Skip to content
Snippets Groups Projects
Commit 739fcb5f authored by dominip89's avatar dominip89
Browse files

Update course_scraper.py

parent 48a37c22
No related branches found
No related tags found
No related merge requests found
"""
Implementation of a rudementary scraping tool
for http://www.buchsys.de for SWP UniSport-O-Mat.
"""
import requests
from bs4 import BeautifulSoup
......@@ -17,17 +22,21 @@ def fetch_website(url):
# pinpoint the parser only to the section containing the course names and links
return soup.find("dl", {"class": "bs_menu"}).find_all("a", href=True)
except requests.exceptions.RequestException as e:
print(e)
except requests.exceptions.RequestException as err:
print(err)
raise
def scraping(site=None) -> dict:
"""
Returns a dictionary of the form {name: link}, containing the scraped content of https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/index.html, unless another URL is given as an argument.
Returns a dictionary of the form {name: link},
containing the scraped content of
https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/index.html,
unless another URL is given as an argument.
"""
courses = {}
if site == None:
if site is None:
site = "https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/"
website = fetch_website(site)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment