Skip to content
Snippets Groups Projects
Commit b464dbd4 authored by borzechof99's avatar borzechof99 :whale2:
Browse files

Merge branch...

Merge branch '49-implement-sport-scraper-and-sport-archive-api-endpoints-for-admin-frontend' into 'master'

Implemented Sport Scraper API, Business Logic, Archive

Closes #49

See merge request swp-unisport/team-warumkeinrust/unisport-o-mat!48
parents 1bb726f1 4951361a
No related branches found
No related tags found
No related merge requests found
......@@ -5,6 +5,7 @@ for http://www.buchsys.de for SWP UniSport-O-Mat.
import requests
from bs4 import BeautifulSoup
from collections import OrderedDict
def fetch_website(url):
......@@ -27,14 +28,14 @@ def fetch_website(url):
raise
def scraping(site=None) -> dict:
def scraping(site=None) -> OrderedDict:
"""
Returns a dictionary of the form {name: link},
containing the scraped content of
https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/index.html,
unless another URL is given as an argument.
"""
courses = {}
courses = OrderedDict()
if site is None:
site = "https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/"
......
......@@ -3,6 +3,7 @@ Testing module, yo. Just for the course_scraper.py.
"""
from django.test import TestCase
from course_scraper import scraping # , fetch_website
from collections import OrderedDict
class ScraperTestCase(TestCase):
......@@ -15,7 +16,7 @@ class ScraperTestCase(TestCase):
"""
Testing return type of scraping().
"""
self.assertIsInstance(scraping(), dict)
self.assertIsInstance(scraping(), OrderedDict)
def test_dict_not_empty(self):
"""
......
......@@ -42,7 +42,7 @@ class SportManager(models.Manager):
def create_sport(self, **kwargs):
"""
Creates new Sport Object and every CriterionRating for it
Creates new sport object and every CriterionRating for it
"""
sport = self.create(**kwargs)
sport.currently_active = True
......@@ -56,10 +56,10 @@ class SportManager(models.Manager):
class Sport(models.Model):
"""
Defines a Sport with name, url that leads to the booking page.
Defines a sport with name, url that leads to the booking page.
A sport includes ratings for all criterions.
(e.g. How much it corresponds to the criterion "Martial Arts")
TODO: last_used may be changed in the future to better work with other functionalities, eg Statistics.
TODO: last_used may be changed in the future to better work with other functionalities, eg. statistics.
"""
name = models.TextField()
......@@ -67,7 +67,7 @@ class Sport(models.Model):
criteria_ratings = models.ManyToManyField("Criterion", through="CriterionRating")
# The Date Field last_used is set to now-time everytime a sport is activated
# either through manual activation or activation through the scraper
# Either through manual activation or activation through the scraper
last_used = models.DateField(default=timezone.localdate)
# Boolean currently_active states whether the sport is in the archive or not
......
......@@ -206,6 +206,32 @@ class CriteriaSerializer(serializers.BaseSerializer):
return criteria_list
class ArchiveSerializer(serializers.BaseSerializer):
"""
Serializes Sports in an archive format
"""
def to_representation(self, sport_list):
"""
Takes a list of Sport Objects and returns id, name, last_used, url serialized
"""
json_sport_list = []
for sport in sport_list:
json_sport_list.append(
{
"id": sport.pk,
"name": sport.name,
"last_used": sport.last_used,
"url": sport.url,
}
)
return json_sport_list
class GreetingEndSerializer(serializers.BaseSerializer):
"""
Serializer for GreetingText and EndText
......
......@@ -536,7 +536,7 @@ class APITest(APITestCase):
def test_currently_active(self):
"""
Tests if PATCHing the "currently_active" value to false correctly changes the sport
Tests if PATCHing the 'currently_active' value to false correctly changes the sport
"""
# Set Up Values
......@@ -550,6 +550,57 @@ class APITest(APITestCase):
self.assertEqual(response.data["currently_active"], False)
def test_sport_archive(self):
"""
Tests whether a sport is correctly displayed in the archive
"""
# Get initial State of Archive
response = self.client.get(reverse("archive"))
self.assertEqual(response.data["results"], [])
# Change State of Sport
response = self.client.patch(
reverse("small-sport-list-detail", kwargs={"pk": 1}),
data={"currently_active": False},
format="json",
)
# Check whether the Archive changed
response = self.client.get(reverse("archive"))
self.assertEqual(response.data["results"][0]["id"], 1)
self.assertEqual(response.data["results"][0]["name"], "Jiu Jitsu")
def test_sport_scraper(self):
"""
Tests what the SportScrapingAPI returns
"""
# Get small-sport-list
sport_response = self.client.get(reverse("small-sport-list-list"))
self.assertEqual(len(sport_response.data["results"]), 1)
# Get Diff List
response = self.client.get(reverse("scraper"))
# Check for Length
# At the moment of the Test, 121 Sports are in buchsys
# (Might Break in new Semester with new Sports!)
self.assertEqual(len(response.data["results"]), 122)
# Send Diff back to Server
response = self.client.post(
reverse("scraper"), response.data["results"], format="json"
)
sport_response = self.client.get(reverse("small-sport-list-list"))
self.assertEqual(len(sport_response.data["results"]), 121)
sport_response = self.client.get(reverse("archive"))
self.assertEqual(len(sport_response.data["results"]), 1)
def test_greeting_view(self):
"""
Test whether the greeting behaves correctly
......
......@@ -2,7 +2,7 @@
Defines the views for the API
"""
# from django.shortcuts import render
import copy
from rest_framework import viewsets
from rest_framework.views import APIView
from rest_framework.response import Response
......@@ -10,6 +10,8 @@ from django.shortcuts import get_object_or_404
from django.http import HttpResponse
from .pagination import PageNumberWithPageSizePagination
from .course_scraper.course_scraper import scraping
from .serializers import (
SmallSportListSerializer,
SportListSerializer,
......@@ -18,6 +20,7 @@ from .serializers import (
SingleSportSerializer,
CriteriaSerializer,
IncompleteSportSerializer,
ArchiveSerializer,
GreetingEndSerializer,
)
from .models import Sport, Criterion, Question, GreetingText, EndText
......@@ -284,6 +287,178 @@ class CriteriaView(APIView):
return Response(response.data)
class SportArchiveView(APIView):
"""
View for the list of all archived sports (so all sports with currently_active = False)
"""
def get(self, request):
"""
GET for api/admin/archive/
"""
paginator = PageNumberWithPageSizePagination()
archived_sports = paginator.paginate_queryset(
Sport.objects.filter(currently_active=False).order_by("name"), request
)
response = ArchiveSerializer(archived_sports)
return paginator.get_paginated_response(response.data)
class ScraperView(APIView):
"""
View for the scraper, including GET and POST
"""
def get(self, request): # pylint: disable=too-many-locals
"""
Scrapes the sports currently on the default website,
and sends a diff of the current sports and the scraped ones
"""
# Scrape sports from their website
scraped_sports = scraping()
# Iterate through DB Sport entries to see which ones are in the new Sport list etc
diff_list = []
id_counter = 1
empty_diff_dict = {
"id": -1,
"kind_of_diff": "",
"old_sport": {
"id": -1,
"name": "",
"url": "",
"last_used": "",
},
"new_sport": {
"name": "",
"url": "",
},
}
for old_sport in Sport.objects.filter(currently_active=True):
diff_dict = copy.deepcopy(empty_diff_dict)
diff_dict["id"] = id_counter
diff_dict["old_sport"]["id"] = old_sport.pk
diff_dict["old_sport"]["name"] = old_sport.name
diff_dict["old_sport"]["url"] = old_sport.url
diff_dict["old_sport"]["last_used"] = old_sport.last_used
if old_sport.name in scraped_sports.keys():
# A Sport currently active is also found in the new scraped sports
# -> "same" Sport
diff_dict["kind_of_diff"] = "same"
new_sport_url = scraped_sports.pop(old_sport.name)
diff_dict["new_sport"]["name"] = old_sport.name
diff_dict["new_sport"]["url"] = new_sport_url
else:
# A Sport currently active is _not_ found in the new scraped sports
# -> "to_be_archived" Sport
diff_dict["kind_of_diff"] = "to_be_archived"
diff_list.append(diff_dict)
id_counter += 1
for new_sport_name, new_sport_url in scraped_sports.items():
# The query should only contain zero values, or one value.
# The name is technically not primary key candidate, but they shouldn't be included more than once
query = Sport.objects.filter(name=new_sport_name)
diff_dict = copy.deepcopy(empty_diff_dict)
diff_dict["id"] = id_counter
diff_dict["new_sport"]["name"] = new_sport_name
diff_dict["new_sport"]["url"] = new_sport_url
if query.count() == 0:
# The new Sport is not found in the Archive, so it is completely new
# -> "new" Sport
diff_dict["kind_of_diff"] = "new"
else:
# The new Sport is in the Archive, so it needs to be resurrected
# -> "from_archive" Sport
old_sport = query.get(name=new_sport_name)
diff_dict["kind_of_diff"] = "from_archive"
diff_dict["old_sport"]["id"] = old_sport.pk
diff_dict["old_sport"]["name"] = old_sport.name
diff_dict["old_sport"]["url"] = old_sport.url
diff_dict["old_sport"]["last_used"] = old_sport.last_used
diff_list.append(diff_dict)
id_counter += 1
# Paginate it all!
paginator = PageNumberWithPageSizePagination()
paginated_list = paginator.paginate_queryset(diff_list, request)
return paginator.get_paginated_response(paginated_list)
def post(self, request):
"""
Gets list of diffs from Frontend and writes them into the database
"""
diff_data = request.data
for diff in diff_data:
if diff["kind_of_diff"] == "new":
sport = Sport.objects.create_sport()
sport.name = diff["new_sport"]["name"]
sport.url = diff["new_sport"]["url"]
elif diff["kind_of_diff"] == "same":
# In case of URL changes, "same" updates the URL
sport = Sport.objects.get(pk=diff["old_sport"]["id"])
sport.url = diff["new_sport"]["url"]
# To update last_used
sport.reactivate()
elif diff["kind_of_diff"] == "to_be_archived":
sport = Sport.objects.get(pk=diff["old_sport"]["id"])
sport.currently_active = False
else:
# -> from_archive
sport = Sport.objects.get(pk=diff["old_sport"]["id"])
sport.reactivate()
# Same as in "same", URL could be different
sport.url = diff["new_sport"]["url"]
sport.save()
return Response(status=200)
class GreetingEndView(APIView):
"""
View for handling the beginning sentence
......
......@@ -60,7 +60,8 @@ MIDDLEWARE = [
# SOURCE: https://github.com/bmihelac/ra-data-django-rest-framework
REST_FRAMEWORK = {
"DEFAULT_PAGINATION_CLASS": "quiz.pagination.PageNumberWithPageSizePagination",
"PAGE_SIZE": 10,
# Page size can be chosen by Frontend, so we set it high internally to streamline testing
"PAGE_SIZE": 1000,
}
ROOT_URLCONF = "unisportomat.urls"
......
......@@ -33,6 +33,8 @@ urlpatterns = [
name="incomplete",
),
path("api/admin/criteria/", views.CriteriaView.as_view(), name="criteria"),
path("api/admin/sport/archive/", views.SportArchiveView.as_view(), name="archive"),
path("api/admin/sport/scraper/", views.ScraperView.as_view(), name="scraper"),
path("api/admin/greeting/", views.GreetingView.as_view(), name="greeting"),
path("api/admin/end/", views.EndView.as_view(), name="end"),
]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment