Fix error caused by unexpected links in HAFAS' connection output, improve date handling, add exception raiser to threads

This commit is contained in:
Klaus-Uwe Mitterer 2017-10-26 21:21:41 +02:00
parent d4dd2ba0e4
commit 92e69a14f4

View file

@ -4,6 +4,7 @@ import datetime
import pytz import pytz
import threading import threading
import queue import queue
import sys
import workers.val import workers.val
from classes import * from classes import *
@ -11,85 +12,101 @@ from classes import *
def getStation(name): def getStation(name):
return list(workers.val.validateName(name))[0] return list(workers.val.validateName(name))[0]
def getService(sid, lines, q): def getService(sid, lines, q, eq = None):
dep = lines[0] try:
arr = lines[1] dep = lines[0]
det = lines[2] arr = lines[1]
det = lines[2]
depst = list(workers.val.validateName(dep.find("td", { "class": "station" }).findAll("a")[0].string))[0] depst = list(workers.val.validateName(dep.find("td", { "class": "station" }).findAll("a")[0].string))[0]
depdate = dep.find("td", { "class": "date" }).string.strip() or None depdate = dep.find("td", { "class": "date" }).string.strip() or None
deptime = dep.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip() deptime = dep.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip()
depprog = (dep.find("span", { "class": "prognosis" }).find("span") or dep.find("span", { "class": "prognosis" })).string.strip() or None depprog = (dep.find("span", { "class": "prognosis" }).find("span") or dep.find("span", { "class": "prognosis" })).string.strip() or None
depplat = (dep.find("td", { "class": "platform" }).find("span") or dep.find("td", { "class": "platform" })).string.strip() or None depplat = (dep.find("td", { "class": "platform" }).find("span") or dep.find("td", { "class": "platform" })).string.strip() or None
walk = dep.find("img", { "class": "product" }).get("src") == "/img/vs_oebb/fuss_pic.gif" walk = dep.find("img", { "class": "product" }).get("src") == "/img/vs_oebb/fuss_pic.gif"
name = dep.find("img", { "class": "product" }).get("alt") if not walk else "Walk" name = dep.find("img", { "class": "product" }).get("alt") if not walk else "Walk"
arrst = list(workers.val.validateName(arr.find("td", { "class": "station" }).findAll("a")[0].string))[0] arrst = list(workers.val.validateName(arr.find("td", { "class": "station" }).findAll("a")[0].string))[0]
arrdate = (arr.find("td", { "class": "date" }).find("span") or arr.find("td", { "class": "date" })).string.strip() or depdate arrdate = (arr.find("td", { "class": "date" }).find("span") or arr.find("td", { "class": "date" })).string.strip() or depdate
arrtime = arr.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip() arrtime = arr.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip()
arrprog = (arr.find("span", { "class": "prognosis" }).find("span") or arr.find("span", { "class": "prognosis" })).string.strip() or None arrprog = (arr.find("span", { "class": "prognosis" }).find("span") or arr.find("span", { "class": "prognosis" })).string.strip() or None
arrplat = (arr.find("td", { "class": "platform" }).find("span") or arr.find("td", { "class": "platform" })).string.strip() or None arrplat = (arr.find("td", { "class": "platform" }).find("span") or arr.find("td", { "class": "platform" })).string.strip() or None
if arrdate and not depdate: if arrdate and not depdate:
arrdts = datetime.datetime.strptime(arrdate, "%d.%m.%Y") arrdts = datetime.datetime.strptime(arrdate, "%d.%m.%Y")
depdts = arrdts - datetime.timedelta(days=1) depdts = arrdts - datetime.timedelta(days=1)
depdate = datetime.datetime.strftime(depdts, "%d.%m.%Y") depdate = datetime.datetime.strftime(depdts, "%d.%m.%Y")
if not (walk and depdate): dest = None
purl = dep.find("td", { "class": "product" }).find("a").get("href")
psource = requests.get(purl).text
zuppa = BeautifulSoup(psource, "html5lib") if not (walk or depdate):
depdate = zuppa.findAll("div", { "class": "block" })[1].text.strip() purl = dep.find("td", { "class": "product" }).find("a").get("href")
arrdate = arrdate or depdate psource = requests.get(purl).text
dest = None if walk else list(workers.val.validateName(zuppa.findAll("div", { "class": "block" })[2].text.split(":")[1].strip()))[0]
depts = datetime.datetime.strptime("%s %s" % (depdate, deptime), "%d.%m.%Y %H:%M") zuppa = BeautifulSoup(psource, "html5lib")
arrts = datetime.datetime.strptime("%s %s" % (arrdate, arrtime), "%d.%m.%Y %H:%M") depdate = zuppa.findAll("div", { "class": "block" })[1].text.strip()
arrdate = depdate
dest = list(workers.val.validateName(zuppa.findAll("div", { "class": "block" })[2].text.split(":")[1].strip()))[0]
depprog = deptime if depprog == "pünktlich" else depprog elif not depdate:
arrprog = arrtime if arrprog == "pünktlich" else arrprog depdate = "01.01.2000"
arrdate = depdate
svc = Service(name, depst, depts, arrst, arrts, dest, depplat, depprog, arrplat, arrprog) depts = datetime.datetime.strptime("%s %s" % (depdate, deptime), "%d.%m.%Y %H:%M")
q.put((sid, svc)) arrts = datetime.datetime.strptime("%s %s" % (arrdate, arrtime), "%d.%m.%Y %H:%M")
def getDetails(cid, url, q, via = []): depprog = deptime if depprog == "pünktlich" else depprog
ssource = requests.get(url).text arrprog = arrtime if arrprog == "pünktlich" else arrprog
suppe = BeautifulSoup(ssource, "html5lib")
cont = suppe.find("tr", id="trC0-%i" % cid) svc = Service(name, depst, depts, arrst, arrts, dest, depplat, depprog, arrplat, arrprog)
if not cont: q.put((sid, svc))
return
# buyurl = None except Exception as e:
if eq:
eq.put(sys.exc_info())
def getDetails(cid, url, q, via = [], eq = None):
try:
ssource = requests.get(url).text
suppe = BeautifulSoup(ssource, "html5lib")
cont = suppe.find("tr", id="trC0-%i" % cid)
if not cont:
return
# buyurl = None
# #
# for url in cont.findAll("a"): # for url in cont.findAll("a"):
# if url.get("href") and "https://tickets.oebb.at/de/ticket/ticket?" in url.get("href"): # if url.get("href") and "https://tickets.oebb.at/de/ticket/ticket?" in url.get("href"):
# buyurl = url.get("href") # buyurl = url.get("href")
conn = Connection(True) conn = Connection(True)
for vst in via: for vst in via:
conn.addVia(vst) conn.addVia(vst)
lines = cont.findAll("tr", { "class": "tpDetails" })[1:] lines = cont.findAll("tr", { "class": "tpDetails" })[1:]
threads = [] threads = []
iq = queue.PriorityQueue() iq = queue.PriorityQueue()
for line in range(0, len(lines), 3): for line in range(0, len(lines), 3):
t = threading.Thread(target=getService, args=(line, lines[line:line + 3], iq)) t = threading.Thread(target=getService, args=(line, lines[line:line + 3], iq, eq))
t.start() t.start()
threads += [t] threads += [t]
for t in threads: for t in threads:
t.join() t.join()
while not iq.empty(): while not iq.empty():
conn.addService(iq.get()[1]) conn.addService(iq.get()[1])
q.put((cid, conn)) q.put((cid, conn))
except:
if eq:
eq.put(sys.exc_info())
def connRequest(frm, to, count = 3, time = datetime.datetime.now(), mode = False, details = False, via = []): def connRequest(frm, to, count = 3, time = datetime.datetime.now(), mode = False, details = False, via = []):
outdate = datetime.datetime.strftime(time, "%d.%m.%Y") outdate = datetime.datetime.strftime(time, "%d.%m.%Y")
@ -111,20 +128,25 @@ def connRequest(frm, to, count = 3, time = datetime.datetime.now(), mode = False
conns = [] conns = []
for a in juha.findAll("a"): for a in juha.findAll("a"):
if a.get("href") and "HWAI=CONNECTION$" in a.get("href"): if a.get("href") and "GO_conViewMode" in a.get("href"):
conns += [a.get("href")] conns += [a.get("href")]
threads = [] threads = []
eq = queue.Queue()
q = queue.PriorityQueue() q = queue.PriorityQueue()
for i in range(len(conns[:-1])): for i in range(len(conns)):
t = threading.Thread(target=getDetails, args=(i, conns[i], q, via)) t = threading.Thread(target=getDetails, args=(i, conns[i], q, via, eq))
t.start() t.start()
threads += [t] threads += [t]
for t in threads: for t in threads:
t.join() t.join()
if not eq.empty():
exc = eq.get()
raise exc[1].with_traceback(exc[2])
while not q.empty(): while not q.empty():
yield q.get()[1] yield q.get()[1]