From 739fcb5f93ebf1791a9a58a2903a3b35ad2fe0f0 Mon Sep 17 00:00:00 2001 From: dominip89 <dominip89@mi.fu-berlin.de> Date: Sun, 23 May 2021 17:55:33 +0000 Subject: [PATCH] Update course_scraper.py --- unisportomat/course_scraper/course_scraper.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/unisportomat/course_scraper/course_scraper.py b/unisportomat/course_scraper/course_scraper.py index 27ea1ce..2d51c7f 100644 --- a/unisportomat/course_scraper/course_scraper.py +++ b/unisportomat/course_scraper/course_scraper.py @@ -1,3 +1,8 @@ +""" +Implementation of a rudementary scraping tool +for http://www.buchsys.de for SWP UniSport-O-Mat. +""" + import requests from bs4 import BeautifulSoup @@ -17,17 +22,21 @@ def fetch_website(url): # pinpoint the parser only to the section containing the course names and links return soup.find("dl", {"class": "bs_menu"}).find_all("a", href=True) - except requests.exceptions.RequestException as e: - print(e) + except requests.exceptions.RequestException as err: + print(err) + raise def scraping(site=None) -> dict: """ - Returns a dictionary of the form {name: link}, containing the scraped content of https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/index.html, unless another URL is given as an argument. + Returns a dictionary of the form {name: link}, + containing the scraped content of + https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/index.html, + unless another URL is given as an argument. """ courses = {} - if site == None: + if site is None: site = "https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/" website = fetch_website(site) -- GitLab