Skip to content
Snippets Groups Projects
Commit 034c82a9 authored by borzechof99's avatar borzechof99 :whale2:
Browse files

Implement Scraping and Diff Handling of Sports

parent b592868b
No related branches found
No related tags found
No related merge requests found
...@@ -570,3 +570,30 @@ class APITest(APITestCase): ...@@ -570,3 +570,30 @@ class APITest(APITestCase):
self.assertEqual(response.data[0]["id"], 1) self.assertEqual(response.data[0]["id"], 1)
self.assertEqual(response.data[0]["name"], "Jiu Jitsu") self.assertEqual(response.data[0]["name"], "Jiu Jitsu")
def test_sport_scraper(self):
"""
Tests what the Sport Scraping API returns
"""
# Get small-sport-list
sport_response = self.client.get(reverse("small-sport-list-list"))
self.assertEqual(len(sport_response.data["results"]), 1)
# Get Diff List
response = self.client.get(reverse("scraper"))
# Check for Length
# (Might Break in new Semester with new Sports!)
self.assertEqual(len(response.data["results"]), 121)
# Send Diff back to Server
response = self.client.post(
reverse("scraper"), response.data["results"], format="json"
)
sport_response = self.client.get(reverse("small-sport-list-list"))
self.assertEqual(len(sport_response.data["results"]), 120)
sport_response = self.client.get(reverse("archive"))
self.assertEqual(len(sport_response.data), 1)
...@@ -9,6 +9,9 @@ from rest_framework.response import Response ...@@ -9,6 +9,9 @@ from rest_framework.response import Response
from django.shortcuts import get_object_or_404 from django.shortcuts import get_object_or_404
from django.http import HttpResponse from django.http import HttpResponse
from .pagination import PageNumberWithPageSizePagination from .pagination import PageNumberWithPageSizePagination
import copy
from .course_scraper.course_scraper import scraping
from .serializers import ( from .serializers import (
SmallSportListSerializer, SmallSportListSerializer,
...@@ -299,3 +302,156 @@ class SportArchiveView(APIView): ...@@ -299,3 +302,156 @@ class SportArchiveView(APIView):
response = ArchiveSerializer(archived_sports) response = ArchiveSerializer(archived_sports)
return Response(response.data) return Response(response.data)
class ScraperView(APIView):
"""
View for the Scraper, including GET and POST
"""
def get(self, request):
# Scrape Sports from their website
scraped_sports = scraping()
# Reformat scraped Sports so they have a name and an url field
new_sport_list = []
for sport_name, sport_url in scraped_sports.items():
new_sport_list.append({"name": sport_name, "url": sport_url})
# Iterate through DB Sport entries to see which ones are in the new Sport list etc
diff_list = []
id_counter = 1
empty_diff_dict = {
"id": -1,
"kind_of_diff": "",
"old_sport": {
"id": -1,
"name": "",
"url": "",
"last_used": "",
},
"new_sport": {
"name": "",
"url": "",
},
}
for old_sport in Sport.objects.filter(currently_active=True):
diff_dict = copy.deepcopy(empty_diff_dict)
diff_dict["id"] = id_counter
diff_dict["old_sport"]["id"] = old_sport.pk
diff_dict["old_sport"]["name"] = old_sport.name
diff_dict["old_sport"]["url"] = old_sport.url
diff_dict["old_sport"]["last_used"] = old_sport.last_used
if old_sport.name in scraped_sports.keys():
# A Sport currently active is also found in the new scraped sports
# -> "same" Sport
diff_dict["kind_of_diff"] = "same"
new_sport_url = scraped_sports.pop(old_sport.name)
diff_dict["new_sport"]["name"] = old_sport.name
diff_dict["new_sport"]["url"] = new_sport_url
else:
# A Sport currently active is _not_ found in the new scraped sports
# -> "to_be_archived" Sport
diff_dict["kind_of_diff"] = "to_be_archived"
diff_list.append(diff_dict)
id_counter += 1
for new_sport_name, new_sport_url in scraped_sports.items():
# The query should only contain zero values, or one value.
# The Name is technically no PK-Candidate, but they shouldn't be included more than once
query = Sport.objects.filter(name=new_sport_name)
diff_dict = copy.deepcopy(empty_diff_dict)
diff_dict["id"] = id_counter
diff_dict["new_sport"]["name"] = new_sport_name
diff_dict["new_sport"]["url"] = new_sport_url
if query.count() == 0:
# The new Sport is not found in the Archive, so it is completely new
# -> "new" Sport
diff_dict["kind_of_diff"] = "new"
else:
# The new Sport is in the Archive, so it needs to be resurrected
# -> "from_archive" Sport
old_sport = query.get(name=new_sport_name)
diff_dict["kind_of_diff"] = "from_archive"
diff_dict["old_sport"]["id"] = old_sport.pk
diff_dict["old_sport"]["name"] = old_sport.name
diff_dict["old_sport"]["url"] = old_sport.url
diff_dict["old_sport"]["last_used"] = old_sport.last_used
diff_list.append(diff_dict)
id_counter += 1
# Paginate it all!
paginator = PageNumberWithPageSizePagination()
paginated_list = paginator.paginate_queryset(diff_list, request)
return paginator.get_paginated_response(paginated_list)
def post(self, request):
"""
Gets List of Diffs from Frontend and writes them into the Database
"""
diff_data = request.data
for diff in diff_data:
if diff["kind_of_diff"] == "new":
sport = Sport.objects.create_sport()
sport.name = diff["new_sport"]["name"]
sport.url = diff["new_sport"]["url"]
elif diff["kind_of_diff"] == "same":
# In Case of URL changes, "same" updates the URL
sport = Sport.objects.get(pk=diff["old_sport"]["id"])
sport.url = diff["new_sport"]["url"]
# To Update last_used
sport.reactivate()
elif diff["kind_of_diff"] == "to_be_archived":
sport = Sport.objects.get(pk=diff["old_sport"]["id"])
sport.currently_active = False
else:
# -> from_archive
sport = Sport.objects.get(pk=diff["old_sport"]["id"])
sport.reactivate()
# Same as in "same", url could be different
sport.url = diff["new_sport"]["url"]
sport.save()
# TODO: Maybe Redirect?
return Response(status=200)
...@@ -60,7 +60,8 @@ MIDDLEWARE = [ ...@@ -60,7 +60,8 @@ MIDDLEWARE = [
# SOURCE: https://github.com/bmihelac/ra-data-django-rest-framework # SOURCE: https://github.com/bmihelac/ra-data-django-rest-framework
REST_FRAMEWORK = { REST_FRAMEWORK = {
"DEFAULT_PAGINATION_CLASS": "quiz.pagination.PageNumberWithPageSizePagination", "DEFAULT_PAGINATION_CLASS": "quiz.pagination.PageNumberWithPageSizePagination",
"PAGE_SIZE": 10, # Page Size can be chosen by Frontend, so we set it high internally to streamline testing
"PAGE_SIZE": 1000,
} }
ROOT_URLCONF = "unisportomat.urls" ROOT_URLCONF = "unisportomat.urls"
......
...@@ -34,4 +34,5 @@ urlpatterns = [ ...@@ -34,4 +34,5 @@ urlpatterns = [
), ),
path("api/admin/criteria/", views.CriteriaView.as_view(), name="criteria"), path("api/admin/criteria/", views.CriteriaView.as_view(), name="criteria"),
path("api/admin/sport/archive/", views.SportArchiveView.as_view(), name="archive"), path("api/admin/sport/archive/", views.SportArchiveView.as_view(), name="archive"),
path("api/admin/sport/scraper/", views.ScraperView.as_view(), name="scraper"),
] ]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment