Skip to content
Snippets Groups Projects
Commit 1171a22e authored by yklingele's avatar yklingele
Browse files

Upload new file

parent 1135aaee
Branches
No related tags found
No related merge requests found
# -*- coding: cp1252 -*-
# imports
from bs4 import BeautifulSoup
import requests
import csv
import collections
import itertools
def main():
print "Scraping from heise.de/thema/https\n"
csvfile = csv.writer(open('yannis_julius_heisescraper.csv', 'w'), delimiter = ';') #create csv file with delimiter ;
for page in range(0, 4, 1): #repeat process for each page of heise.de to the topic https
heise_url = "https://www.heise.de/thema/https?seite="+str(page)+".html"
data = requests.get(heise_url).text
content = BeautifulSoup(data, "lxml")
keywords = content.findAll('div', class_='keywordliste') #find all
for header in keywords:
header = header.findAll('header')
del header[0] #delete the very first header that
txt = []
for h in header: #appends all headers to a list
txt.append(h.text.encode('utf-8').rstrip()) #removes \n from the headers
csvfile.writerow(txt) #adds headers to the csv file
print "Finished scraping from heise.de/thema/https\n"
print "Continuing with counting word frequency\n"
#split all the words with a delimiter ; and put them into a list
split_words = [item.split(';') for item in (open('yannis_julius_heisescraper.csv', 'r').read().split())]
merged_list = list(itertools.chain(*split_words))
#count the frequency of each word, put the words into a dictionary with the amount
wordfrequency = [merged_list.count(word) for word in merged_list]
dictionary = dict(zip(merged_list, wordfrequency))
#sort the amounts
aux = [(dictionary[key], key) for key in dictionary]
aux.sort()
aux.reverse()
#print top 3 results
print "Results: \n 1.",aux[0],"\n 2.",aux[1],"\n 3.",aux[2]
#execution
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment