From 034c82a9047dc82c0a6a8caae80841669094c595 Mon Sep 17 00:00:00 2001
From: borzechof99 <borzechof99@mi.fu-berlin.de>
Date: Sat, 26 Jun 2021 18:28:01 +0200
Subject: [PATCH] Implement Scraping and Diff Handling of Sports

---
 unisportomat/quiz/tests.py            |  27 +++++
 unisportomat/quiz/views.py            | 156 ++++++++++++++++++++++++++
 unisportomat/unisportomat/settings.py |   3 +-
 unisportomat/unisportomat/urls.py     |   1 +
 4 files changed, 186 insertions(+), 1 deletion(-)

diff --git a/unisportomat/quiz/tests.py b/unisportomat/quiz/tests.py
index ea681b2..96746d9 100644
--- a/unisportomat/quiz/tests.py
+++ b/unisportomat/quiz/tests.py
@@ -570,3 +570,30 @@ class APITest(APITestCase):
 
         self.assertEqual(response.data[0]["id"], 1)
         self.assertEqual(response.data[0]["name"], "Jiu Jitsu")
+
+    def test_sport_scraper(self):
+        """
+        Tests what the Sport Scraping API returns
+        """
+
+        # Get small-sport-list
+        sport_response = self.client.get(reverse("small-sport-list-list"))
+        self.assertEqual(len(sport_response.data["results"]), 1)
+
+        # Get Diff List
+        response = self.client.get(reverse("scraper"))
+
+        # Check for Length
+        # (Might Break in new Semester with new Sports!)
+        self.assertEqual(len(response.data["results"]), 121)
+
+        # Send Diff back to Server
+        response = self.client.post(
+            reverse("scraper"), response.data["results"], format="json"
+        )
+
+        sport_response = self.client.get(reverse("small-sport-list-list"))
+        self.assertEqual(len(sport_response.data["results"]), 120)
+
+        sport_response = self.client.get(reverse("archive"))
+        self.assertEqual(len(sport_response.data), 1)
diff --git a/unisportomat/quiz/views.py b/unisportomat/quiz/views.py
index b800b0b..c125cc6 100644
--- a/unisportomat/quiz/views.py
+++ b/unisportomat/quiz/views.py
@@ -9,6 +9,9 @@ from rest_framework.response import Response
 from django.shortcuts import get_object_or_404
 from django.http import HttpResponse
 from .pagination import PageNumberWithPageSizePagination
+import copy
+
+from .course_scraper.course_scraper import scraping
 
 from .serializers import (
     SmallSportListSerializer,
@@ -299,3 +302,156 @@ class SportArchiveView(APIView):
         response = ArchiveSerializer(archived_sports)
 
         return Response(response.data)
+
+
+class ScraperView(APIView):
+    """
+    View for the Scraper, including GET and POST
+    """
+
+    def get(self, request):
+
+        # Scrape Sports from their website
+        scraped_sports = scraping()
+
+        # Reformat scraped Sports so they have a name and an url field
+        new_sport_list = []
+        for sport_name, sport_url in scraped_sports.items():
+            new_sport_list.append({"name": sport_name, "url": sport_url})
+
+        # Iterate through DB Sport entries to see which ones are in the new Sport list etc
+        diff_list = []
+
+        id_counter = 1
+
+        empty_diff_dict = {
+            "id": -1,
+            "kind_of_diff": "",
+            "old_sport": {
+                "id": -1,
+                "name": "",
+                "url": "",
+                "last_used": "",
+            },
+            "new_sport": {
+                "name": "",
+                "url": "",
+            },
+        }
+
+        for old_sport in Sport.objects.filter(currently_active=True):
+
+            diff_dict = copy.deepcopy(empty_diff_dict)
+
+            diff_dict["id"] = id_counter
+
+            diff_dict["old_sport"]["id"] = old_sport.pk
+            diff_dict["old_sport"]["name"] = old_sport.name
+            diff_dict["old_sport"]["url"] = old_sport.url
+            diff_dict["old_sport"]["last_used"] = old_sport.last_used
+
+            if old_sport.name in scraped_sports.keys():
+                # A Sport currently active is also found in the new scraped sports
+                # -> "same" Sport
+
+                diff_dict["kind_of_diff"] = "same"
+
+                new_sport_url = scraped_sports.pop(old_sport.name)
+
+                diff_dict["new_sport"]["name"] = old_sport.name
+                diff_dict["new_sport"]["url"] = new_sport_url
+
+            else:
+                # A Sport currently active is _not_ found in the new scraped sports
+                # -> "to_be_archived" Sport
+
+                diff_dict["kind_of_diff"] = "to_be_archived"
+
+            diff_list.append(diff_dict)
+            id_counter += 1
+
+        for new_sport_name, new_sport_url in scraped_sports.items():
+
+            # The query should only contain zero values, or one value.
+            # The Name is technically no PK-Candidate, but they shouldn't be included more than once
+            query = Sport.objects.filter(name=new_sport_name)
+
+            diff_dict = copy.deepcopy(empty_diff_dict)
+
+            diff_dict["id"] = id_counter
+
+            diff_dict["new_sport"]["name"] = new_sport_name
+            diff_dict["new_sport"]["url"] = new_sport_url
+
+            if query.count() == 0:
+                # The new Sport is not found in the Archive, so it is completely new
+                # -> "new" Sport
+
+                diff_dict["kind_of_diff"] = "new"
+
+            else:
+                # The new Sport is in the Archive, so it needs to be resurrected
+                # -> "from_archive" Sport
+
+                old_sport = query.get(name=new_sport_name)
+
+                diff_dict["kind_of_diff"] = "from_archive"
+
+                diff_dict["old_sport"]["id"] = old_sport.pk
+                diff_dict["old_sport"]["name"] = old_sport.name
+                diff_dict["old_sport"]["url"] = old_sport.url
+                diff_dict["old_sport"]["last_used"] = old_sport.last_used
+
+            diff_list.append(diff_dict)
+            id_counter += 1
+
+        # Paginate it all!
+
+        paginator = PageNumberWithPageSizePagination()
+        paginated_list = paginator.paginate_queryset(diff_list, request)
+
+        return paginator.get_paginated_response(paginated_list)
+
+    def post(self, request):
+        """
+        Gets List of Diffs from Frontend and writes them into the Database
+        """
+
+        diff_data = request.data
+
+        for diff in diff_data:
+
+            if diff["kind_of_diff"] == "new":
+
+                sport = Sport.objects.create_sport()
+                sport.name = diff["new_sport"]["name"]
+                sport.url = diff["new_sport"]["url"]
+
+            elif diff["kind_of_diff"] == "same":
+                # In Case of URL changes, "same" updates the URL
+
+                sport = Sport.objects.get(pk=diff["old_sport"]["id"])
+
+                sport.url = diff["new_sport"]["url"]
+
+                # To Update last_used
+                sport.reactivate()
+
+            elif diff["kind_of_diff"] == "to_be_archived":
+
+                sport = Sport.objects.get(pk=diff["old_sport"]["id"])
+                sport.currently_active = False
+
+            else:
+                # -> from_archive
+
+                sport = Sport.objects.get(pk=diff["old_sport"]["id"])
+                sport.reactivate()
+
+                # Same as in "same", url could be different
+                sport.url = diff["new_sport"]["url"]
+
+            sport.save()
+
+        # TODO: Maybe Redirect?
+        return Response(status=200)
diff --git a/unisportomat/unisportomat/settings.py b/unisportomat/unisportomat/settings.py
index e41e887..24d2a9b 100644
--- a/unisportomat/unisportomat/settings.py
+++ b/unisportomat/unisportomat/settings.py
@@ -60,7 +60,8 @@ MIDDLEWARE = [
 # SOURCE: https://github.com/bmihelac/ra-data-django-rest-framework
 REST_FRAMEWORK = {
     "DEFAULT_PAGINATION_CLASS": "quiz.pagination.PageNumberWithPageSizePagination",
-    "PAGE_SIZE": 10,
+    # Page Size can be chosen by Frontend, so we set it high internally to streamline testing
+    "PAGE_SIZE": 1000,
 }
 
 ROOT_URLCONF = "unisportomat.urls"
diff --git a/unisportomat/unisportomat/urls.py b/unisportomat/unisportomat/urls.py
index 6d5b897..1ad9220 100644
--- a/unisportomat/unisportomat/urls.py
+++ b/unisportomat/unisportomat/urls.py
@@ -34,4 +34,5 @@ urlpatterns = [
     ),
     path("api/admin/criteria/", views.CriteriaView.as_view(), name="criteria"),
     path("api/admin/sport/archive/", views.SportArchiveView.as_view(), name="archive"),
+    path("api/admin/sport/scraper/", views.ScraperView.as_view(), name="scraper"),
 ]
-- 
GitLab