Skip to content
Snippets Groups Projects
Commit 0f918597 authored by borzechof99's avatar borzechof99 :whale2:
Browse files

Implement Scraping and Diff Handling of Sports

parent aaf4377c
No related branches found
No related tags found
No related merge requests found
......@@ -565,3 +565,30 @@ class APITest(APITestCase):
self.assertEqual(response.data[0]["id"], 1)
self.assertEqual(response.data[0]["name"], "Jiu Jitsu")
def test_sport_scraper(self):
"""
Tests what the Sport Scraping API returns
"""
# Get small-sport-list
sport_response = self.client.get(reverse("small-sport-list-list"))
self.assertEqual(len(sport_response.data["results"]), 1)
# Get Diff List
response = self.client.get(reverse("scraper"))
# Check for Length
# (Might Break in new Semester with new Sports!)
self.assertEqual(len(response.data["results"]), 121)
# Send Diff back to Server
response = self.client.post(
reverse("scraper"), response.data["results"], format="json"
)
sport_response = self.client.get(reverse("small-sport-list-list"))
self.assertEqual(len(sport_response.data["results"]), 120)
sport_response = self.client.get(reverse("archive"))
self.assertEqual(len(sport_response.data), 1)
......@@ -9,6 +9,9 @@ from rest_framework.response import Response
from django.shortcuts import get_object_or_404
from django.http import HttpResponse
from .pagination import PageNumberWithPageSizePagination
import copy
from .course_scraper.course_scraper import scraping
from .serializers import (
SmallSportListSerializer,
......@@ -297,3 +300,156 @@ class SportArchiveView(APIView):
response = ArchiveSerializer(archived_sports)
return Response(response.data)
class ScraperView(APIView):
"""
View for the Scraper, including GET and POST
"""
def get(self, request):
# Scrape Sports from their website
scraped_sports = scraping()
# Reformat scraped Sports so they have a name and an url field
new_sport_list = []
for sport_name, sport_url in scraped_sports.items():
new_sport_list.append({"name": sport_name, "url": sport_url})
# Iterate through DB Sport entries to see which ones are in the new Sport list etc
diff_list = []
id_counter = 1
empty_diff_dict = {
"id": -1,
"kind_of_diff": "",
"old_sport": {
"id": -1,
"name": "",
"url": "",
"last_used": "",
},
"new_sport": {
"name": "",
"url": "",
},
}
for old_sport in Sport.objects.filter(currently_active=True):
diff_dict = copy.deepcopy(empty_diff_dict)
diff_dict["id"] = id_counter
diff_dict["old_sport"]["id"] = old_sport.pk
diff_dict["old_sport"]["name"] = old_sport.name
diff_dict["old_sport"]["url"] = old_sport.url
diff_dict["old_sport"]["last_used"] = old_sport.last_used
if old_sport.name in scraped_sports.keys():
# A Sport currently active is also found in the new scraped sports
# -> "same" Sport
diff_dict["kind_of_diff"] = "same"
new_sport_url = scraped_sports.pop(old_sport.name)
diff_dict["new_sport"]["name"] = old_sport.name
diff_dict["new_sport"]["url"] = new_sport_url
else:
# A Sport currently active is _not_ found in the new scraped sports
# -> "to_be_archived" Sport
diff_dict["kind_of_diff"] = "to_be_archived"
diff_list.append(diff_dict)
id_counter += 1
for new_sport_name, new_sport_url in scraped_sports.items():
# The query should only contain zero values, or one value.
# The Name is technically no PK-Candidate, but they shouldn't be included more than once
query = Sport.objects.filter(name=new_sport_name)
diff_dict = copy.deepcopy(empty_diff_dict)
diff_dict["id"] = id_counter
diff_dict["new_sport"]["name"] = new_sport_name
diff_dict["new_sport"]["url"] = new_sport_url
if query.count() == 0:
# The new Sport is not found in the Archive, so it is completely new
# -> "new" Sport
diff_dict["kind_of_diff"] = "new"
else:
# The new Sport is in the Archive, so it needs to be resurrected
# -> "from_archive" Sport
old_sport = query.get(name=new_sport_name)
diff_dict["kind_of_diff"] = "from_archive"
diff_dict["old_sport"]["id"] = old_sport.pk
diff_dict["old_sport"]["name"] = old_sport.name
diff_dict["old_sport"]["url"] = old_sport.url
diff_dict["old_sport"]["last_used"] = old_sport.last_used
diff_list.append(diff_dict)
id_counter += 1
# Paginate it all!
paginator = PageNumberWithPageSizePagination()
paginated_list = paginator.paginate_queryset(diff_list, request)
return paginator.get_paginated_response(paginated_list)
def post(self, request):
"""
Gets List of Diffs from Frontend and writes them into the Database
"""
diff_data = request.data
for diff in diff_data:
if diff["kind_of_diff"] == "new":
sport = Sport.objects.create_sport()
sport.name = diff["new_sport"]["name"]
sport.url = diff["new_sport"]["url"]
elif diff["kind_of_diff"] == "same":
# In Case of URL changes, "same" updates the URL
sport = Sport.objects.get(pk=diff["old_sport"]["id"])
sport.url = diff["new_sport"]["url"]
# To Update last_used
sport.reactivate()
elif diff["kind_of_diff"] == "to_be_archived":
sport = Sport.objects.get(pk=diff["old_sport"]["id"])
sport.currently_active = False
else:
# -> from_archive
sport = Sport.objects.get(pk=diff["old_sport"]["id"])
sport.reactivate()
# Same as in "same", url could be different
sport.url = diff["new_sport"]["url"]
sport.save()
# TODO: Maybe Redirect?
return Response(status=200)
......@@ -60,7 +60,8 @@ MIDDLEWARE = [
# SOURCE: https://github.com/bmihelac/ra-data-django-rest-framework
REST_FRAMEWORK = {
"DEFAULT_PAGINATION_CLASS": "quiz.pagination.PageNumberWithPageSizePagination",
"PAGE_SIZE": 10,
# Page Size can be chosen by Frontend, so we set it high internally to streamline testing
"PAGE_SIZE": 1000,
}
ROOT_URLCONF = "unisportomat.urls"
......
......@@ -34,4 +34,5 @@ urlpatterns = [
),
path("api/admin/criteria/", views.CriteriaView.as_view(), name="criteria"),
path("api/admin/sport/archive/", views.SportArchiveView.as_view(), name="archive"),
path("api/admin/sport/scraper/", views.ScraperView.as_view(), name="scraper"),
]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment