Merge branch '7-parsen-von-buchsys-de' into 'master'

Resolve "Parsen von buchsys.de" Closes #7 See merge request swp-unisport/team-warumkeinrust/unisport-o-mat!6

Merge branch '7-parsen-von-buchsys-de' into 'master'
aba99400 · fu2662cw · 757d7273 · 45f32c76 · aba99400 · aba99400
Commit aba99400 authored 4 years ago by fu2662cw
--- a/requirements.txt
+++ b/requirements.txt
 asgiref==3.3.4
+beautifulsoup4==4.9.3
+certifi==2020.12.5
+chardet==4.0.0
 Django==3.2
+idna==2.10
 pytz==2021.1
-sqlparse==0.4.1
\ No newline at end of file
+requests==2.25.1
+soupsieve==2.2.1
+sqlparse==0.4.1
+urllib3==1.26.4
--- a/unisportomat/course_scraper/course_scraper.py
+++ b/unisportomat/course_scraper/course_scraper.py
+import requests
+from bs4 import BeautifulSoup
+
+
+def fetch_website(url):
+    """
+    Helper function to fetch the content of a website.
+    Uses requests to fetch the html page and BeautifulSoup to parse the html.
+    """
+    try:
+        # get a object containing the web page's html
+        response = requests.get(url)
+
+        # parse the html content with BeautifulSoup
+        soup = BeautifulSoup(response.content, "html.parser")
+
+        # pinpoint the parser only to the section containing the course names and links
+        return soup.find("dl", {"class": "bs_menu"}).find_all("a", href=True)
+
+    except requests.exceptions.RequestException as e:
+        print(e)
+
+
+def scraping(site=None) -> dict:
+    """
+    Returns a dictionary of the form {name: link}, containing the scraped content of https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/index.html, unless another URL is given as an argument.
+    """
+    courses = {}
+
+    if site == None:
+        site = "https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/"
+
+    website = fetch_website(site)
+
+    for element in website:
+        # filters out the link to the Restplätze-Suche, which isn't a course itself
+        if element["href"] != "kurssuche.html#RP":
+            courses[element.text] = f'{site}{element["href"]}'
+
+    return courses
+
+
+if __name__ == "__main__":
+    print(scraping())
--- a/unisportomat/course_scraper/test_course_scraper.py
+++ b/unisportomat/course_scraper/test_course_scraper.py
+from django.test import TestCase
+from course_scraper import fetch_website, scraping
+
+
+class ScraperTestCase(TestCase):
+    def test_returns_dict(self):
+        self.assertIsInstance(scraping(), dict)
+
+    def test_dict_not_empty(self):
+        self.assertTrue(len(scraping()) > 0)