# -*- coding: utf-8 -*- from requests import get from lxml import html import re def get_week(): page = get('https://www.kth.bme.hu/') domtree = html.fromstring(page.content) text = domtree.cssselect('#site-container > div.infobar > div.line-2')[0].text_content() text = unicode(text.replace('\n', '').replace('\t', '')) regex = re.match(ur'.*félév\s+(\d+)\.\s+hete.*', text) if regex: week = regex.groups()[0] else: regex = re.match(ur'.*nyár\s+(\d+)\.\s+hete.*', text) if regex: raise EnvironmentError('Nyar van!') else: raise RuntimeError('Baj van a KTH oldalan!') return int(week) - 1 if __name__ == '__main__': print(get_week())