diff --git a/unisportomat/course_scraper/course_scraper.py b/unisportomat/course_scraper/course_scraper.py index 27ea1ce5d033d773a6725112d88e9b0d042b899c..2d51c7f6d5ce663aa4785f246cee688924e4c902 100644 --- a/unisportomat/course_scraper/course_scraper.py +++ b/unisportomat/course_scraper/course_scraper.py @@ -1,3 +1,8 @@ +""" +Implementation of a rudementary scraping tool +for http://www.buchsys.de for SWP UniSport-O-Mat. +""" + import requests from bs4 import BeautifulSoup @@ -17,17 +22,21 @@ def fetch_website(url): # pinpoint the parser only to the section containing the course names and links return soup.find("dl", {"class": "bs_menu"}).find_all("a", href=True) - except requests.exceptions.RequestException as e: - print(e) + except requests.exceptions.RequestException as err: + print(err) + raise def scraping(site=None) -> dict: """ - Returns a dictionary of the form {name: link}, containing the scraped content of https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/index.html, unless another URL is given as an argument. + Returns a dictionary of the form {name: link}, + containing the scraped content of + https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/index.html, + unless another URL is given as an argument. """ courses = {} - if site == None: + if site is None: site = "https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/" website = fetch_website(site)