Skip to content
Snippets Groups Projects
Commit 1171a22e authored by yklingele's avatar yklingele
Browse files

Upload new file

parent 1135aaee
No related branches found
No related tags found
No related merge requests found
# -*- coding: cp1252 -*-
# imports
from bs4 import BeautifulSoup
import requests
import csv
import collections
import itertools
def main():
print "Scraping from heise.de/thema/https\n"
csvfile = csv.writer(open('yannis_julius_heisescraper.csv', 'w'), delimiter = ';') #create csv file with delimiter ;
for page in range(0, 4, 1): #repeat process for each page of heise.de to the topic https
heise_url = "https://www.heise.de/thema/https?seite="+str(page)+".html"
data = requests.get(heise_url).text
content = BeautifulSoup(data, "lxml")
keywords = content.findAll('div', class_='keywordliste') #find all
for header in keywords:
header = header.findAll('header')
del header[0] #delete the very first header that
txt = []
for h in header: #appends all headers to a list
txt.append(h.text.encode('utf-8').rstrip()) #removes \n from the headers
csvfile.writerow(txt) #adds headers to the csv file
print "Finished scraping from heise.de/thema/https\n"
print "Continuing with counting word frequency\n"
#split all the words with a delimiter ; and put them into a list
split_words = [item.split(';') for item in (open('yannis_julius_heisescraper.csv', 'r').read().split())]
merged_list = list(itertools.chain(*split_words))
#count the frequency of each word, put the words into a dictionary with the amount
wordfrequency = [merged_list.count(word) for word in merged_list]
dictionary = dict(zip(merged_list, wordfrequency))
#sort the amounts
aux = [(dictionary[key], key) for key in dictionary]
aux.sort()
aux.reverse()
#print top 3 results
print "Results: \n 1.",aux[0],"\n 2.",aux[1],"\n 3.",aux[2]
#execution
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment