diff --git a/package.json b/package.json index bed42fa834078282f1cb1954111d6bd5895115c8..87da4563764920c85953330fbae0699250fc9d11 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "@types/express": "^4.17.11", "@types/jest": "^26.0.22", "@types/node": "14.14.41", + "@types/node-fetch": "^2.5.10", "@typescript-eslint/eslint-plugin": "^4.22.0", "@typescript-eslint/parser": "^4.22.0", "eslint": "^7.25.0", @@ -29,6 +30,8 @@ "eslint-plugin-node": "^11.1.0", "eslint-plugin-promise": "^4.3.1", "jest": "^26.6.3", + "node-fetch": "^2.6.1", + "node-html-parser": "^3.3.0", "prettier": "2.2.1", "ts-jest": "^26.5.5", "ts-node": "^9.1.1", diff --git a/src/parsingScript.ts b/src/parsingScript.ts new file mode 100644 index 0000000000000000000000000000000000000000..0acc53b04c05d1cc3ef5f33428fa909f4eaf9a32 --- /dev/null +++ b/src/parsingScript.ts @@ -0,0 +1,27 @@ +import fetch from 'node-fetch' +import * as parser from 'node-html-parser' +const url = + 'https://www.buchsys.de/fu-berlin/angebote/aktueller_zeitraum/index.html' + +export function parseFromBuchsys() { + return fetch(url) + .then(function (data) { + return data + .text() + .then((html) => { + var array = [] + var data = parser + .parse(html) + .querySelector('dl') + .querySelectorAll('a') + data.map((child) => { + array.push([child.text, child.getAttribute('href')]) + }) + return array + }) + .catch(() => []) + }) + .catch(() => []) +} + +//parseFromBuchsys().then((e) => console.log(e))