diff --git a/get_week.py b/get_week.py new file mode 100644 index 0000000..5490c9a --- /dev/null +++ b/get_week.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +from requests import get +from lxml import html +import re + + +def get_week(): + page = get('https://www.kth.bme.hu/') + domtree = html.fromstring(page.content) + + text = domtree.cssselect('#site-container > div.infobar > div.line-2')[0].text_content() + text = unicode(text.replace('\n', '').replace('\t', '')) + + regex = re.match(ur'.*félév\s+(\d+)\.\s+hete.*', text) + if regex: + week = regex.groups()[0] + else: + regex = re.match(ur'.*nyár\s+(\d+)\.\s+hete.*', text) + if regex: + raise EnvironmentError('Nyar van!') + else: + raise RuntimeError('Baj van a KTH oldalan!') + + return int(week) - 1 + + +if __name__ == '__main__': + print(get_week())