From e9cbd328e949a837b30f8752ebc80d97c62e1dea Mon Sep 17 00:00:00 2001 From: carlos93 <carlos93@mi.fu-berlin.de> Date: Thu, 20 May 2021 18:14:58 +0200 Subject: [PATCH] Added parsingScript -Added parsing Script --- package.json | 3 +++ src/parsingScript.ts | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 src/parsingScript.ts diff --git a/package.json b/package.json index bed42fa..87da456 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "@types/express": "^4.17.11", "@types/jest": "^26.0.22", "@types/node": "14.14.41", + "@types/node-fetch": "^2.5.10", "@typescript-eslint/eslint-plugin": "^4.22.0", "@typescript-eslint/parser": "^4.22.0", "eslint": "^7.25.0", @@ -29,6 +30,8 @@ "eslint-plugin-node": "^11.1.0", "eslint-plugin-promise": "^4.3.1", "jest": "^26.6.3", + "node-fetch": "^2.6.1", + "node-html-parser": "^3.3.0", "prettier": "2.2.1", "ts-jest": "^26.5.5", "ts-node": "^9.1.1", diff --git a/src/parsingScript.ts b/src/parsingScript.ts new file mode 100644 index 0000000..0acc53b --- /dev/null +++ b/src/parsingScript.ts @@ -0,0 +1,27 @@ +import fetch from 'node-fetch' +import * as parser from 'node-html-parser' +const url = + 'https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/index.html' + +export function parseFromBuchsys() { + return fetch(url) + .then(function (data) { + return data + .text() + .then((html) => { + var array = [] + var data = parser + .parse(html) + .querySelector('dl') + .querySelectorAll('a') + data.map((child) => { + array.push([child.text, child.getAttribute('href')]) + }) + return array + }) + .catch(() => []) + }) + .catch(() => []) +} + +//parseFromBuchsys().then((e) => console.log(e)) -- GitLab