Software-Engineering Documentation # BeautifulSoup ? A powerful library, that allows scraping data from websites using python. # Intro import urllib.request as urllib2 from bs4 import BeautifulSoup # loading website response = urllib2.urlopen('https://www.htlkrems.ac.at') html_doc = response.read() # represents full HTML-document soup = BeautifulSoup(html_doc, 'html.parser') # formated HTML-document structure strhtm = soup.prettify() # output print(strhtm[:1000]) Output # Element data # get element-data print(soup.title) print(soup.title.string) # <title>HTL Krems</title> # HTL Krems # .find_all() # text from every anchor-tag on website for tag in soup.find_all("a"): print(tag.text) # count all hyperlinks of the website links = soup.find_all("a") print(len(links))