added get_week.py to handle networking

This commit is contained in:
Kjistóf 2016-11-30 10:38:35 +01:00
parent 3045456ed5
commit 9eb80cf402
1 changed files with 28 additions and 0 deletions

28
get_week.py Normal file
View File

@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-
from requests import get
from lxml import html
import re
def get_week():
page = get('https://www.kth.bme.hu/')
domtree = html.fromstring(page.content)
text = domtree.cssselect('#site-container > div.infobar > div.line-2')[0].text_content()
text = unicode(text.replace('\n', '').replace('\t', ''))
regex = re.match(ur'.*félév\s+(\d+)\.\s+hete.*', text)
if regex:
week = regex.groups()[0]
else:
regex = re.match(ur'.*nyár\s+(\d+)\.\s+hete.*', text)
if regex:
raise EnvironmentError('Nyar van!')
else:
raise RuntimeError('Baj van a KTH oldalan!')
return int(week) - 1
if __name__ == '__main__':
print(get_week())