Use threading to improve response times for connection detail requests, set default connection count to 6 again as it doesn't matter anymore, update README.

This commit is contained in:
Klaus-Uwe Mitterer 2017-10-25 23:07:36 +02:00
parent 883cb9ee7c
commit d4dd2ba0e4
4 changed files with 121 additions and 87 deletions

View file

@ -67,7 +67,7 @@ The following parameters are required for this type of request:
The following optional parameters are accepted:
* "count": Number of connections to return (default: 1)
* "count": Number of connections to return (default: 6)
* "date": Date on which to find a connection (e.g. 31.12.2017, default: today)
* "time": Time at which to find a connection (e.g. 20:00, default: current time)
* "mode": May be set to "arr" if "time" should be considered time of arrival
@ -88,17 +88,16 @@ By default, the response will not include some information, including at which
stations a change to a different service is required, if applicable. It will,
however, return the total number of changes. If additional information is
required, it may be requested by adding the "details" parameter to the request.
Note that this request is going to take considerably longer to process. If at
all possible, do not set "count" higher than 1 when requesting details as too
many connections may cause the query to time-out.
Note that this request is going to take about two to three times as long to
process.
* [Example "conn" request without details]
(https://bahnapi.xyz/?type=conn&from=Strobl&to=Wien)
(https://bahnapi.xyz/?type=conn&from=Koppl&to=Wien)
* [Same request with details]
(https://bahnapi.xyz/?type=conn&from=Strobl&to=Wien&details)
(https://bahnapi.xyz/?type=conn&from=Koppl&to=Wien&details)
If "count" is not set, one connection will be returned. This value may safely be
increased up to 6 if "details" is not set.
If "count" is not set, six connection will be returned, which is the maximum
retrievable at once.
* [Example "conn" request for five connections]
(https://bahnapi.xyz/?type=conn&from=Thörl&to=Pama&count=5)

View file

@ -66,7 +66,7 @@ class Connection:
def xml(self, indent = 0, cid = False, frm = True, to = True, deptime = True, arrtime = True, duration = True, changes = True, services = True, via = True, servicekwargs = {}, stationkwargs = {}):
out = " " * indent + "<connection%s>\n" % ("" if cid is False else " id=\"%i\"" % cid)
if frm:
if frm and self.depst():
out += " " * indent + " <from>\n"
out += self.depst().xml(indent + 2, **stationkwargs) + "\n"
out += " " * indent + " </from>\n"
@ -79,32 +79,32 @@ class Connection:
out += " " * indent + " </via>\n"
if to:
if to and self.arrst():
out += " " * indent + " <to>\n"
out += self.arrst().xml(indent + 2, **stationkwargs) + "\n"
out += " " * indent + " </to>\n"
if deptime or arrtime or duration or changes:
if (deptime and self.deptime()) or (arrtime and self.arrtime()) or (duration and self.duration()) or (changes and self.changes()):
out += " " * indent + " <details>\n"
if deptime:
if deptime and self.deptime():
out += " " * indent + " <departure>\n"
out += " " * indent + " <date>%s</date>\n" % datetime.datetime.strftime(self.deptime(), "%d.%m.%Y")
out += " " * indent + " <time>%s</time>\n" % datetime.datetime.strftime(self.deptime(), "%H:%M")
out += " " * indent + " </departure>\n"
if arrtime:
if arrtime and self.arrtime():
out += " " * indent + " <arrival>\n"
out += " " * indent + " <date>%s</date>\n" % datetime.datetime.strftime(self.arrtime(), "%d.%m.%Y")
out += " " * indent + " <time>%s</time>\n" % datetime.datetime.strftime(self.arrtime(), "%H:%M")
out += " " * indent + " </arrival>\n"
out += (" " * indent + " <duration>%s</duration>\n" % self.durationString()) if duration else ""
out += (" " * indent + " <changes>%i</changes>\n" % self.changes()) if changes else ""
out += (" " * indent + " <duration>%s</duration>\n" % self.durationString()) if duration and self.duration() else ""
out += (" " * indent + " <changes>%i</changes>\n" % self.changes()) if changes and self.changes() else ""
out += " " * indent + " </details>\n"
if services:
if services and self.services:
out += " " * indent + " <services>\n"
for i in range(len(self.services)):
@ -121,7 +121,7 @@ class Connection:
out += (" " * indent + " \"@id\": %i,\n" % cid) if cid is not False else ""
if frm:
if frm and self.depst():
out += " " * indent + " \"from\":\n"
out += self.depst().json(indent + 2, **stationkwargs) + ",\n"
@ -134,35 +134,35 @@ class Connection:
out = "".join(out.rsplit(",", 1))
out += " " * indent + " ],\n"
if to:
if to and self.arrst():
out += " " * indent + " \"to\":\n"
out += self.arrst().json(indent + 2, **stationkwargs) + ",\n"
if deptime or arrtime or duration or changes:
if (deptime and self.deptime()) or (arrtime and self.arrtime()) or (duration and self.duration()) or (changes and self.changes()):
det = ""
det += " " * indent + " \"details\": {\n"
if deptime:
if deptime and self.deptime():
det += " " * indent + " \"departure\": {\n"
det += " " * indent + " \"date\": \"%s\",\n" % datetime.datetime.strftime(self.deptime(), "%d.%m.%Y")
det += " " * indent + " \"time\": \"%s\"\n" % datetime.datetime.strftime(self.deptime(), "%H:%M")
det += " " * indent + " },\n"
if arrtime:
if arrtime and self.arrtime():
det += " " * indent + " \"arrival\": {\n"
det += " " * indent + " \"date\": \"%s\",\n" % datetime.datetime.strftime(self.arrtime(), "%d.%m.%Y")
det += " " * indent + " \"time\": \"%s\"\n"% datetime.datetime.strftime(self.arrtime(), "%H:%M")
det += " " * indent + " },\n"
det += (" " * indent + " \"duration\": \"%s\",\n" % self.durationString()) if duration else ""
det += (" " * indent + " \"changes\": %i,\n" % self.changes()) if changes else ""
det += (" " * indent + " \"duration\": \"%s\",\n" % self.durationString()) if duration and self.duration() else ""
det += (" " * indent + " \"changes\": %i,\n" % self.changes()) if changes and self.changes() else ""
det = "".join(det.rsplit(",", 1))
out += det
out += " " * indent + " },\n"
if services:
if services and self.services:
out += " " * indent + " \"services\": [\n"
for i in range(len(self.services)):

View file

@ -70,7 +70,7 @@ def application(env, re):
yield "\"from\" and \"to\" values are required for this type of request.".encode()
return
count = args["count"][0] if "count" in args and args["count"] else 1
count = args["count"][0] if "count" in args and args["count"] else 6
date = args["date"][0] if "date" in args and args["date"] else datetime.datetime.strftime(datetime.datetime.now(pytz.timezone("Europe/Vienna")),"%d.%m.%Y")
time = args["time"][0] if "time" in args and args["time"] else datetime.datetime.strftime(datetime.datetime.now(pytz.timezone("Europe/Vienna")),"%H:%M")
mode = True if "mode" in args and args["mode"] and args["mode"][0].lower() == "arr" else False

View file

@ -2,6 +2,8 @@ from bs4 import BeautifulSoup
import requests
import datetime
import pytz
import threading
import queue
import workers.val
from classes import *
@ -9,9 +11,89 @@ from classes import *
def getStation(name):
return list(workers.val.validateName(name))[0]
def getService(sid, lines, q):
dep = lines[0]
arr = lines[1]
det = lines[2]
depst = list(workers.val.validateName(dep.find("td", { "class": "station" }).findAll("a")[0].string))[0]
depdate = dep.find("td", { "class": "date" }).string.strip() or None
deptime = dep.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip()
depprog = (dep.find("span", { "class": "prognosis" }).find("span") or dep.find("span", { "class": "prognosis" })).string.strip() or None
depplat = (dep.find("td", { "class": "platform" }).find("span") or dep.find("td", { "class": "platform" })).string.strip() or None
walk = dep.find("img", { "class": "product" }).get("src") == "/img/vs_oebb/fuss_pic.gif"
name = dep.find("img", { "class": "product" }).get("alt") if not walk else "Walk"
arrst = list(workers.val.validateName(arr.find("td", { "class": "station" }).findAll("a")[0].string))[0]
arrdate = (arr.find("td", { "class": "date" }).find("span") or arr.find("td", { "class": "date" })).string.strip() or depdate
arrtime = arr.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip()
arrprog = (arr.find("span", { "class": "prognosis" }).find("span") or arr.find("span", { "class": "prognosis" })).string.strip() or None
arrplat = (arr.find("td", { "class": "platform" }).find("span") or arr.find("td", { "class": "platform" })).string.strip() or None
if arrdate and not depdate:
arrdts = datetime.datetime.strptime(arrdate, "%d.%m.%Y")
depdts = arrdts - datetime.timedelta(days=1)
depdate = datetime.datetime.strftime(depdts, "%d.%m.%Y")
if not (walk and depdate):
purl = dep.find("td", { "class": "product" }).find("a").get("href")
psource = requests.get(purl).text
zuppa = BeautifulSoup(psource, "html5lib")
depdate = zuppa.findAll("div", { "class": "block" })[1].text.strip()
arrdate = arrdate or depdate
dest = None if walk else list(workers.val.validateName(zuppa.findAll("div", { "class": "block" })[2].text.split(":")[1].strip()))[0]
depts = datetime.datetime.strptime("%s %s" % (depdate, deptime), "%d.%m.%Y %H:%M")
arrts = datetime.datetime.strptime("%s %s" % (arrdate, arrtime), "%d.%m.%Y %H:%M")
depprog = deptime if depprog == "pünktlich" else depprog
arrprog = arrtime if arrprog == "pünktlich" else arrprog
svc = Service(name, depst, depts, arrst, arrts, dest, depplat, depprog, arrplat, arrprog)
q.put((sid, svc))
def getDetails(cid, url, q, via = []):
ssource = requests.get(url).text
suppe = BeautifulSoup(ssource, "html5lib")
cont = suppe.find("tr", id="trC0-%i" % cid)
if not cont:
return
# buyurl = None
#
# for url in cont.findAll("a"):
# if url.get("href") and "https://tickets.oebb.at/de/ticket/ticket?" in url.get("href"):
# buyurl = url.get("href")
conn = Connection(True)
for vst in via:
conn.addVia(vst)
lines = cont.findAll("tr", { "class": "tpDetails" })[1:]
threads = []
iq = queue.PriorityQueue()
for line in range(0, len(lines), 3):
t = threading.Thread(target=getService, args=(line, lines[line:line + 3], iq))
t.start()
threads += [t]
for t in threads:
t.join()
while not iq.empty():
conn.addService(iq.get()[1])
q.put((cid, conn))
def connRequest(frm, to, count = 3, time = datetime.datetime.now(), mode = False, details = False, via = []):
outdate = datetime.datetime.strftime(time,"%d.%m.%Y")
outtime = datetime.datetime.strftime(time,"%H:%M")
outdate = datetime.datetime.strftime(time, "%d.%m.%Y")
outtime = datetime.datetime.strftime(time, "%H:%M")
url = "http://fahrplan.oebb.at/bin/query.exe/dn?start=1&S=%s&Z=%s&REQ0JourneyDate=%s&time=%s&REQ0HafasNumCons0=%s%s" % (frm.extid if frm.extid else frm.name, to.extid if to.extid else to.name, outdate, outtime, count, "&timesel=arrive" if mode else "")
@ -26,72 +108,25 @@ def connRequest(frm, to, count = 3, time = datetime.datetime.now(), mode = False
juha = BeautifulSoup(source, "html5lib")
if details:
conns = []
for a in juha.findAll("a"):
if a.get("href") and "HWAI=CONNECTION$" in a.get("href"):
dpage = a.get("href")
conns += [a.get("href")]
ssource = requests.get(dpage).text
suppe = BeautifulSoup(ssource, "html5lib")
for i in range(0, count):
cont = suppe.find("tr", id="trC0-%i" % i)
if not cont:
break
threads = []
q = queue.PriorityQueue()
# buyurl = None
#
# for url in cont.findAll("a"):
# if url.get("href") and "https://tickets.oebb.at/de/ticket/ticket?" in url.get("href"):
# buyurl = url.get("href")
for i in range(len(conns[:-1])):
t = threading.Thread(target=getDetails, args=(i, conns[i], q, via))
t.start()
threads += [t]
conn = Connection(details)
for t in threads:
t.join()
for vst in via:
conn.addVia(vst)
lines = cont.findAll("tr", { "class": "tpDetails" })[1:]
cdate = None
for line in range(0, len(lines), 3):
serv = lines[line:line + 3]
dep = serv[0]
arr = serv[1]
det = serv[2]
depst = list(workers.val.validateName(dep.find("td", { "class": "station" }).findAll("a")[0].string))[0]
depdate = dep.find("td", { "class": "date" }).string.strip() or cdate
deptime = dep.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip()
depprog = (dep.find("span", { "class": "prognosis" }).find("span") or dep.find("span", { "class": "prognosis" })).string.strip() or None
depplat = (dep.find("td", { "class": "platform" }).find("span") or dep.find("td", { "class": "platform" })).string.strip() or None
walk = dep.find("img", { "class": "product" }).get("src") == "/img/vs_oebb/fuss_pic.gif"
name = dep.find("img", { "class": "product" }).get("alt") if not walk else "Walk"
if not walk:
purl = dep.find("td", { "class": "product" }).find("a").get("href")
psource = requests.get(purl).text
zuppa = BeautifulSoup(psource, "html5lib")
dest = list(workers.val.validateName(zuppa.findAll("div", { "class": "block" })[2].text.split(":")[1].strip()))[0]
arrst = list(workers.val.validateName(arr.find("td", { "class": "station" }).findAll("a")[0].string))[0]
arrdate = (arr.find("td", { "class": "date" }).find("span") or arr.find("td", { "class": "date" })).string.strip() or depdate
arrtime = arr.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip()
arrprog = (arr.find("span", { "class": "prognosis" }).find("span") or arr.find("span", { "class": "prognosis" })).string.strip() or None
arrplat = (arr.find("td", { "class": "platform" }).find("span") or arr.find("td", { "class": "platform" })).string.strip() or None
depts = datetime.datetime.strptime("%s %s" % (depdate, deptime), "%d.%m.%Y %H:%M")
arrts = datetime.datetime.strptime("%s %s" % (arrdate, arrtime), "%d.%m.%Y %H:%M")
depprog = deptime if depprog == "pünktlich" else depprog
arrprog = arrtime if arrprog == "pünktlich" else arrprog
cdate = arrdate
svc = Service(name, depst, depts, arrst, arrts, dest, depplat, depprog, arrplat, arrprog)
conn.addService(svc)
yield conn
while not q.empty():
yield q.get()[1]
else:
for i in range(0, count):