From 034c82a9047dc82c0a6a8caae80841669094c595 Mon Sep 17 00:00:00 2001 From: borzechof99 <borzechof99@mi.fu-berlin.de> Date: Sat, 26 Jun 2021 18:28:01 +0200 Subject: [PATCH] Implement Scraping and Diff Handling of Sports --- unisportomat/quiz/tests.py | 27 +++++ unisportomat/quiz/views.py | 156 ++++++++++++++++++++++++++ unisportomat/unisportomat/settings.py | 3 +- unisportomat/unisportomat/urls.py | 1 + 4 files changed, 186 insertions(+), 1 deletion(-) diff --git a/unisportomat/quiz/tests.py b/unisportomat/quiz/tests.py index ea681b2..96746d9 100644 --- a/unisportomat/quiz/tests.py +++ b/unisportomat/quiz/tests.py @@ -570,3 +570,30 @@ class APITest(APITestCase): self.assertEqual(response.data[0]["id"], 1) self.assertEqual(response.data[0]["name"], "Jiu Jitsu") + + def test_sport_scraper(self): + """ + Tests what the Sport Scraping API returns + """ + + # Get small-sport-list + sport_response = self.client.get(reverse("small-sport-list-list")) + self.assertEqual(len(sport_response.data["results"]), 1) + + # Get Diff List + response = self.client.get(reverse("scraper")) + + # Check for Length + # (Might Break in new Semester with new Sports!) + self.assertEqual(len(response.data["results"]), 121) + + # Send Diff back to Server + response = self.client.post( + reverse("scraper"), response.data["results"], format="json" + ) + + sport_response = self.client.get(reverse("small-sport-list-list")) + self.assertEqual(len(sport_response.data["results"]), 120) + + sport_response = self.client.get(reverse("archive")) + self.assertEqual(len(sport_response.data), 1) diff --git a/unisportomat/quiz/views.py b/unisportomat/quiz/views.py index b800b0b..c125cc6 100644 --- a/unisportomat/quiz/views.py +++ b/unisportomat/quiz/views.py @@ -9,6 +9,9 @@ from rest_framework.response import Response from django.shortcuts import get_object_or_404 from django.http import HttpResponse from .pagination import PageNumberWithPageSizePagination +import copy + +from .course_scraper.course_scraper import scraping from .serializers import ( SmallSportListSerializer, @@ -299,3 +302,156 @@ class SportArchiveView(APIView): response = ArchiveSerializer(archived_sports) return Response(response.data) + + +class ScraperView(APIView): + """ + View for the Scraper, including GET and POST + """ + + def get(self, request): + + # Scrape Sports from their website + scraped_sports = scraping() + + # Reformat scraped Sports so they have a name and an url field + new_sport_list = [] + for sport_name, sport_url in scraped_sports.items(): + new_sport_list.append({"name": sport_name, "url": sport_url}) + + # Iterate through DB Sport entries to see which ones are in the new Sport list etc + diff_list = [] + + id_counter = 1 + + empty_diff_dict = { + "id": -1, + "kind_of_diff": "", + "old_sport": { + "id": -1, + "name": "", + "url": "", + "last_used": "", + }, + "new_sport": { + "name": "", + "url": "", + }, + } + + for old_sport in Sport.objects.filter(currently_active=True): + + diff_dict = copy.deepcopy(empty_diff_dict) + + diff_dict["id"] = id_counter + + diff_dict["old_sport"]["id"] = old_sport.pk + diff_dict["old_sport"]["name"] = old_sport.name + diff_dict["old_sport"]["url"] = old_sport.url + diff_dict["old_sport"]["last_used"] = old_sport.last_used + + if old_sport.name in scraped_sports.keys(): + # A Sport currently active is also found in the new scraped sports + # -> "same" Sport + + diff_dict["kind_of_diff"] = "same" + + new_sport_url = scraped_sports.pop(old_sport.name) + + diff_dict["new_sport"]["name"] = old_sport.name + diff_dict["new_sport"]["url"] = new_sport_url + + else: + # A Sport currently active is _not_ found in the new scraped sports + # -> "to_be_archived" Sport + + diff_dict["kind_of_diff"] = "to_be_archived" + + diff_list.append(diff_dict) + id_counter += 1 + + for new_sport_name, new_sport_url in scraped_sports.items(): + + # The query should only contain zero values, or one value. + # The Name is technically no PK-Candidate, but they shouldn't be included more than once + query = Sport.objects.filter(name=new_sport_name) + + diff_dict = copy.deepcopy(empty_diff_dict) + + diff_dict["id"] = id_counter + + diff_dict["new_sport"]["name"] = new_sport_name + diff_dict["new_sport"]["url"] = new_sport_url + + if query.count() == 0: + # The new Sport is not found in the Archive, so it is completely new + # -> "new" Sport + + diff_dict["kind_of_diff"] = "new" + + else: + # The new Sport is in the Archive, so it needs to be resurrected + # -> "from_archive" Sport + + old_sport = query.get(name=new_sport_name) + + diff_dict["kind_of_diff"] = "from_archive" + + diff_dict["old_sport"]["id"] = old_sport.pk + diff_dict["old_sport"]["name"] = old_sport.name + diff_dict["old_sport"]["url"] = old_sport.url + diff_dict["old_sport"]["last_used"] = old_sport.last_used + + diff_list.append(diff_dict) + id_counter += 1 + + # Paginate it all! + + paginator = PageNumberWithPageSizePagination() + paginated_list = paginator.paginate_queryset(diff_list, request) + + return paginator.get_paginated_response(paginated_list) + + def post(self, request): + """ + Gets List of Diffs from Frontend and writes them into the Database + """ + + diff_data = request.data + + for diff in diff_data: + + if diff["kind_of_diff"] == "new": + + sport = Sport.objects.create_sport() + sport.name = diff["new_sport"]["name"] + sport.url = diff["new_sport"]["url"] + + elif diff["kind_of_diff"] == "same": + # In Case of URL changes, "same" updates the URL + + sport = Sport.objects.get(pk=diff["old_sport"]["id"]) + + sport.url = diff["new_sport"]["url"] + + # To Update last_used + sport.reactivate() + + elif diff["kind_of_diff"] == "to_be_archived": + + sport = Sport.objects.get(pk=diff["old_sport"]["id"]) + sport.currently_active = False + + else: + # -> from_archive + + sport = Sport.objects.get(pk=diff["old_sport"]["id"]) + sport.reactivate() + + # Same as in "same", url could be different + sport.url = diff["new_sport"]["url"] + + sport.save() + + # TODO: Maybe Redirect? + return Response(status=200) diff --git a/unisportomat/unisportomat/settings.py b/unisportomat/unisportomat/settings.py index e41e887..24d2a9b 100644 --- a/unisportomat/unisportomat/settings.py +++ b/unisportomat/unisportomat/settings.py @@ -60,7 +60,8 @@ MIDDLEWARE = [ # SOURCE: https://github.com/bmihelac/ra-data-django-rest-framework REST_FRAMEWORK = { "DEFAULT_PAGINATION_CLASS": "quiz.pagination.PageNumberWithPageSizePagination", - "PAGE_SIZE": 10, + # Page Size can be chosen by Frontend, so we set it high internally to streamline testing + "PAGE_SIZE": 1000, } ROOT_URLCONF = "unisportomat.urls" diff --git a/unisportomat/unisportomat/urls.py b/unisportomat/unisportomat/urls.py index 6d5b897..1ad9220 100644 --- a/unisportomat/unisportomat/urls.py +++ b/unisportomat/unisportomat/urls.py @@ -34,4 +34,5 @@ urlpatterns = [ ), path("api/admin/criteria/", views.CriteriaView.as_view(), name="criteria"), path("api/admin/sport/archive/", views.SportArchiveView.as_view(), name="archive"), + path("api/admin/sport/scraper/", views.ScraperView.as_view(), name="scraper"), ] -- GitLab