Use threading to improve response times for connection detail requests, set default connection count to 6 again as it doesn't matter anymore, update README.

This commit is contained in:
Klaus-Uwe Mitterer 2017-10-25 23:07:36 +02:00
parent 883cb9ee7c
commit d4dd2ba0e4
4 changed files with 121 additions and 87 deletions

View file

@ -67,7 +67,7 @@ The following parameters are required for this type of request:
The following optional parameters are accepted: The following optional parameters are accepted:
* "count": Number of connections to return (default: 1) * "count": Number of connections to return (default: 6)
* "date": Date on which to find a connection (e.g. 31.12.2017, default: today) * "date": Date on which to find a connection (e.g. 31.12.2017, default: today)
* "time": Time at which to find a connection (e.g. 20:00, default: current time) * "time": Time at which to find a connection (e.g. 20:00, default: current time)
* "mode": May be set to "arr" if "time" should be considered time of arrival * "mode": May be set to "arr" if "time" should be considered time of arrival
@ -88,17 +88,16 @@ By default, the response will not include some information, including at which
stations a change to a different service is required, if applicable. It will, stations a change to a different service is required, if applicable. It will,
however, return the total number of changes. If additional information is however, return the total number of changes. If additional information is
required, it may be requested by adding the "details" parameter to the request. required, it may be requested by adding the "details" parameter to the request.
Note that this request is going to take considerably longer to process. If at Note that this request is going to take about two to three times as long to
all possible, do not set "count" higher than 1 when requesting details as too process.
many connections may cause the query to time-out.
* [Example "conn" request without details] * [Example "conn" request without details]
(https://bahnapi.xyz/?type=conn&from=Strobl&to=Wien) (https://bahnapi.xyz/?type=conn&from=Koppl&to=Wien)
* [Same request with details] * [Same request with details]
(https://bahnapi.xyz/?type=conn&from=Strobl&to=Wien&details) (https://bahnapi.xyz/?type=conn&from=Koppl&to=Wien&details)
If "count" is not set, one connection will be returned. This value may safely be If "count" is not set, six connection will be returned, which is the maximum
increased up to 6 if "details" is not set. retrievable at once.
* [Example "conn" request for five connections] * [Example "conn" request for five connections]
(https://bahnapi.xyz/?type=conn&from=Thörl&to=Pama&count=5) (https://bahnapi.xyz/?type=conn&from=Thörl&to=Pama&count=5)

View file

@ -66,7 +66,7 @@ class Connection:
def xml(self, indent = 0, cid = False, frm = True, to = True, deptime = True, arrtime = True, duration = True, changes = True, services = True, via = True, servicekwargs = {}, stationkwargs = {}): def xml(self, indent = 0, cid = False, frm = True, to = True, deptime = True, arrtime = True, duration = True, changes = True, services = True, via = True, servicekwargs = {}, stationkwargs = {}):
out = " " * indent + "<connection%s>\n" % ("" if cid is False else " id=\"%i\"" % cid) out = " " * indent + "<connection%s>\n" % ("" if cid is False else " id=\"%i\"" % cid)
if frm: if frm and self.depst():
out += " " * indent + " <from>\n" out += " " * indent + " <from>\n"
out += self.depst().xml(indent + 2, **stationkwargs) + "\n" out += self.depst().xml(indent + 2, **stationkwargs) + "\n"
out += " " * indent + " </from>\n" out += " " * indent + " </from>\n"
@ -79,32 +79,32 @@ class Connection:
out += " " * indent + " </via>\n" out += " " * indent + " </via>\n"
if to: if to and self.arrst():
out += " " * indent + " <to>\n" out += " " * indent + " <to>\n"
out += self.arrst().xml(indent + 2, **stationkwargs) + "\n" out += self.arrst().xml(indent + 2, **stationkwargs) + "\n"
out += " " * indent + " </to>\n" out += " " * indent + " </to>\n"
if deptime or arrtime or duration or changes: if (deptime and self.deptime()) or (arrtime and self.arrtime()) or (duration and self.duration()) or (changes and self.changes()):
out += " " * indent + " <details>\n" out += " " * indent + " <details>\n"
if deptime: if deptime and self.deptime():
out += " " * indent + " <departure>\n" out += " " * indent + " <departure>\n"
out += " " * indent + " <date>%s</date>\n" % datetime.datetime.strftime(self.deptime(), "%d.%m.%Y") out += " " * indent + " <date>%s</date>\n" % datetime.datetime.strftime(self.deptime(), "%d.%m.%Y")
out += " " * indent + " <time>%s</time>\n" % datetime.datetime.strftime(self.deptime(), "%H:%M") out += " " * indent + " <time>%s</time>\n" % datetime.datetime.strftime(self.deptime(), "%H:%M")
out += " " * indent + " </departure>\n" out += " " * indent + " </departure>\n"
if arrtime: if arrtime and self.arrtime():
out += " " * indent + " <arrival>\n" out += " " * indent + " <arrival>\n"
out += " " * indent + " <date>%s</date>\n" % datetime.datetime.strftime(self.arrtime(), "%d.%m.%Y") out += " " * indent + " <date>%s</date>\n" % datetime.datetime.strftime(self.arrtime(), "%d.%m.%Y")
out += " " * indent + " <time>%s</time>\n" % datetime.datetime.strftime(self.arrtime(), "%H:%M") out += " " * indent + " <time>%s</time>\n" % datetime.datetime.strftime(self.arrtime(), "%H:%M")
out += " " * indent + " </arrival>\n" out += " " * indent + " </arrival>\n"
out += (" " * indent + " <duration>%s</duration>\n" % self.durationString()) if duration else "" out += (" " * indent + " <duration>%s</duration>\n" % self.durationString()) if duration and self.duration() else ""
out += (" " * indent + " <changes>%i</changes>\n" % self.changes()) if changes else "" out += (" " * indent + " <changes>%i</changes>\n" % self.changes()) if changes and self.changes() else ""
out += " " * indent + " </details>\n" out += " " * indent + " </details>\n"
if services: if services and self.services:
out += " " * indent + " <services>\n" out += " " * indent + " <services>\n"
for i in range(len(self.services)): for i in range(len(self.services)):
@ -121,7 +121,7 @@ class Connection:
out += (" " * indent + " \"@id\": %i,\n" % cid) if cid is not False else "" out += (" " * indent + " \"@id\": %i,\n" % cid) if cid is not False else ""
if frm: if frm and self.depst():
out += " " * indent + " \"from\":\n" out += " " * indent + " \"from\":\n"
out += self.depst().json(indent + 2, **stationkwargs) + ",\n" out += self.depst().json(indent + 2, **stationkwargs) + ",\n"
@ -134,35 +134,35 @@ class Connection:
out = "".join(out.rsplit(",", 1)) out = "".join(out.rsplit(",", 1))
out += " " * indent + " ],\n" out += " " * indent + " ],\n"
if to: if to and self.arrst():
out += " " * indent + " \"to\":\n" out += " " * indent + " \"to\":\n"
out += self.arrst().json(indent + 2, **stationkwargs) + ",\n" out += self.arrst().json(indent + 2, **stationkwargs) + ",\n"
if deptime or arrtime or duration or changes: if (deptime and self.deptime()) or (arrtime and self.arrtime()) or (duration and self.duration()) or (changes and self.changes()):
det = "" det = ""
det += " " * indent + " \"details\": {\n" det += " " * indent + " \"details\": {\n"
if deptime: if deptime and self.deptime():
det += " " * indent + " \"departure\": {\n" det += " " * indent + " \"departure\": {\n"
det += " " * indent + " \"date\": \"%s\",\n" % datetime.datetime.strftime(self.deptime(), "%d.%m.%Y") det += " " * indent + " \"date\": \"%s\",\n" % datetime.datetime.strftime(self.deptime(), "%d.%m.%Y")
det += " " * indent + " \"time\": \"%s\"\n" % datetime.datetime.strftime(self.deptime(), "%H:%M") det += " " * indent + " \"time\": \"%s\"\n" % datetime.datetime.strftime(self.deptime(), "%H:%M")
det += " " * indent + " },\n" det += " " * indent + " },\n"
if arrtime: if arrtime and self.arrtime():
det += " " * indent + " \"arrival\": {\n" det += " " * indent + " \"arrival\": {\n"
det += " " * indent + " \"date\": \"%s\",\n" % datetime.datetime.strftime(self.arrtime(), "%d.%m.%Y") det += " " * indent + " \"date\": \"%s\",\n" % datetime.datetime.strftime(self.arrtime(), "%d.%m.%Y")
det += " " * indent + " \"time\": \"%s\"\n"% datetime.datetime.strftime(self.arrtime(), "%H:%M") det += " " * indent + " \"time\": \"%s\"\n"% datetime.datetime.strftime(self.arrtime(), "%H:%M")
det += " " * indent + " },\n" det += " " * indent + " },\n"
det += (" " * indent + " \"duration\": \"%s\",\n" % self.durationString()) if duration else "" det += (" " * indent + " \"duration\": \"%s\",\n" % self.durationString()) if duration and self.duration() else ""
det += (" " * indent + " \"changes\": %i,\n" % self.changes()) if changes else "" det += (" " * indent + " \"changes\": %i,\n" % self.changes()) if changes and self.changes() else ""
det = "".join(det.rsplit(",", 1)) det = "".join(det.rsplit(",", 1))
out += det out += det
out += " " * indent + " },\n" out += " " * indent + " },\n"
if services: if services and self.services:
out += " " * indent + " \"services\": [\n" out += " " * indent + " \"services\": [\n"
for i in range(len(self.services)): for i in range(len(self.services)):

View file

@ -70,7 +70,7 @@ def application(env, re):
yield "\"from\" and \"to\" values are required for this type of request.".encode() yield "\"from\" and \"to\" values are required for this type of request.".encode()
return return
count = args["count"][0] if "count" in args and args["count"] else 1 count = args["count"][0] if "count" in args and args["count"] else 6
date = args["date"][0] if "date" in args and args["date"] else datetime.datetime.strftime(datetime.datetime.now(pytz.timezone("Europe/Vienna")),"%d.%m.%Y") date = args["date"][0] if "date" in args and args["date"] else datetime.datetime.strftime(datetime.datetime.now(pytz.timezone("Europe/Vienna")),"%d.%m.%Y")
time = args["time"][0] if "time" in args and args["time"] else datetime.datetime.strftime(datetime.datetime.now(pytz.timezone("Europe/Vienna")),"%H:%M") time = args["time"][0] if "time" in args and args["time"] else datetime.datetime.strftime(datetime.datetime.now(pytz.timezone("Europe/Vienna")),"%H:%M")
mode = True if "mode" in args and args["mode"] and args["mode"][0].lower() == "arr" else False mode = True if "mode" in args and args["mode"] and args["mode"][0].lower() == "arr" else False

View file

@ -2,6 +2,8 @@ from bs4 import BeautifulSoup
import requests import requests
import datetime import datetime
import pytz import pytz
import threading
import queue
import workers.val import workers.val
from classes import * from classes import *
@ -9,9 +11,89 @@ from classes import *
def getStation(name): def getStation(name):
return list(workers.val.validateName(name))[0] return list(workers.val.validateName(name))[0]
def getService(sid, lines, q):
dep = lines[0]
arr = lines[1]
det = lines[2]
depst = list(workers.val.validateName(dep.find("td", { "class": "station" }).findAll("a")[0].string))[0]
depdate = dep.find("td", { "class": "date" }).string.strip() or None
deptime = dep.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip()
depprog = (dep.find("span", { "class": "prognosis" }).find("span") or dep.find("span", { "class": "prognosis" })).string.strip() or None
depplat = (dep.find("td", { "class": "platform" }).find("span") or dep.find("td", { "class": "platform" })).string.strip() or None
walk = dep.find("img", { "class": "product" }).get("src") == "/img/vs_oebb/fuss_pic.gif"
name = dep.find("img", { "class": "product" }).get("alt") if not walk else "Walk"
arrst = list(workers.val.validateName(arr.find("td", { "class": "station" }).findAll("a")[0].string))[0]
arrdate = (arr.find("td", { "class": "date" }).find("span") or arr.find("td", { "class": "date" })).string.strip() or depdate
arrtime = arr.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip()
arrprog = (arr.find("span", { "class": "prognosis" }).find("span") or arr.find("span", { "class": "prognosis" })).string.strip() or None
arrplat = (arr.find("td", { "class": "platform" }).find("span") or arr.find("td", { "class": "platform" })).string.strip() or None
if arrdate and not depdate:
arrdts = datetime.datetime.strptime(arrdate, "%d.%m.%Y")
depdts = arrdts - datetime.timedelta(days=1)
depdate = datetime.datetime.strftime(depdts, "%d.%m.%Y")
if not (walk and depdate):
purl = dep.find("td", { "class": "product" }).find("a").get("href")
psource = requests.get(purl).text
zuppa = BeautifulSoup(psource, "html5lib")
depdate = zuppa.findAll("div", { "class": "block" })[1].text.strip()
arrdate = arrdate or depdate
dest = None if walk else list(workers.val.validateName(zuppa.findAll("div", { "class": "block" })[2].text.split(":")[1].strip()))[0]
depts = datetime.datetime.strptime("%s %s" % (depdate, deptime), "%d.%m.%Y %H:%M")
arrts = datetime.datetime.strptime("%s %s" % (arrdate, arrtime), "%d.%m.%Y %H:%M")
depprog = deptime if depprog == "pünktlich" else depprog
arrprog = arrtime if arrprog == "pünktlich" else arrprog
svc = Service(name, depst, depts, arrst, arrts, dest, depplat, depprog, arrplat, arrprog)
q.put((sid, svc))
def getDetails(cid, url, q, via = []):
ssource = requests.get(url).text
suppe = BeautifulSoup(ssource, "html5lib")
cont = suppe.find("tr", id="trC0-%i" % cid)
if not cont:
return
# buyurl = None
#
# for url in cont.findAll("a"):
# if url.get("href") and "https://tickets.oebb.at/de/ticket/ticket?" in url.get("href"):
# buyurl = url.get("href")
conn = Connection(True)
for vst in via:
conn.addVia(vst)
lines = cont.findAll("tr", { "class": "tpDetails" })[1:]
threads = []
iq = queue.PriorityQueue()
for line in range(0, len(lines), 3):
t = threading.Thread(target=getService, args=(line, lines[line:line + 3], iq))
t.start()
threads += [t]
for t in threads:
t.join()
while not iq.empty():
conn.addService(iq.get()[1])
q.put((cid, conn))
def connRequest(frm, to, count = 3, time = datetime.datetime.now(), mode = False, details = False, via = []): def connRequest(frm, to, count = 3, time = datetime.datetime.now(), mode = False, details = False, via = []):
outdate = datetime.datetime.strftime(time,"%d.%m.%Y") outdate = datetime.datetime.strftime(time, "%d.%m.%Y")
outtime = datetime.datetime.strftime(time,"%H:%M") outtime = datetime.datetime.strftime(time, "%H:%M")
url = "http://fahrplan.oebb.at/bin/query.exe/dn?start=1&S=%s&Z=%s&REQ0JourneyDate=%s&time=%s&REQ0HafasNumCons0=%s%s" % (frm.extid if frm.extid else frm.name, to.extid if to.extid else to.name, outdate, outtime, count, "&timesel=arrive" if mode else "") url = "http://fahrplan.oebb.at/bin/query.exe/dn?start=1&S=%s&Z=%s&REQ0JourneyDate=%s&time=%s&REQ0HafasNumCons0=%s%s" % (frm.extid if frm.extid else frm.name, to.extid if to.extid else to.name, outdate, outtime, count, "&timesel=arrive" if mode else "")
@ -26,72 +108,25 @@ def connRequest(frm, to, count = 3, time = datetime.datetime.now(), mode = False
juha = BeautifulSoup(source, "html5lib") juha = BeautifulSoup(source, "html5lib")
if details: if details:
conns = []
for a in juha.findAll("a"): for a in juha.findAll("a"):
if a.get("href") and "HWAI=CONNECTION$" in a.get("href"): if a.get("href") and "HWAI=CONNECTION$" in a.get("href"):
dpage = a.get("href") conns += [a.get("href")]
ssource = requests.get(dpage).text threads = []
suppe = BeautifulSoup(ssource, "html5lib") q = queue.PriorityQueue()
for i in range(0, count):
cont = suppe.find("tr", id="trC0-%i" % i)
if not cont:
break
# buyurl = None for i in range(len(conns[:-1])):
# t = threading.Thread(target=getDetails, args=(i, conns[i], q, via))
# for url in cont.findAll("a"): t.start()
# if url.get("href") and "https://tickets.oebb.at/de/ticket/ticket?" in url.get("href"): threads += [t]
# buyurl = url.get("href")
conn = Connection(details) for t in threads:
t.join()
for vst in via: while not q.empty():
conn.addVia(vst) yield q.get()[1]
lines = cont.findAll("tr", { "class": "tpDetails" })[1:]
cdate = None
for line in range(0, len(lines), 3):
serv = lines[line:line + 3]
dep = serv[0]
arr = serv[1]
det = serv[2]
depst = list(workers.val.validateName(dep.find("td", { "class": "station" }).findAll("a")[0].string))[0]
depdate = dep.find("td", { "class": "date" }).string.strip() or cdate
deptime = dep.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip()
depprog = (dep.find("span", { "class": "prognosis" }).find("span") or dep.find("span", { "class": "prognosis" })).string.strip() or None
depplat = (dep.find("td", { "class": "platform" }).find("span") or dep.find("td", { "class": "platform" })).string.strip() or None
walk = dep.find("img", { "class": "product" }).get("src") == "/img/vs_oebb/fuss_pic.gif"
name = dep.find("img", { "class": "product" }).get("alt") if not walk else "Walk"
if not walk:
purl = dep.find("td", { "class": "product" }).find("a").get("href")
psource = requests.get(purl).text
zuppa = BeautifulSoup(psource, "html5lib")
dest = list(workers.val.validateName(zuppa.findAll("div", { "class": "block" })[2].text.split(":")[1].strip()))[0]
arrst = list(workers.val.validateName(arr.find("td", { "class": "station" }).findAll("a")[0].string))[0]
arrdate = (arr.find("td", { "class": "date" }).find("span") or arr.find("td", { "class": "date" })).string.strip() or depdate
arrtime = arr.find("td", { "class": "timeValue" }).find("span").string.split()[1].strip()
arrprog = (arr.find("span", { "class": "prognosis" }).find("span") or arr.find("span", { "class": "prognosis" })).string.strip() or None
arrplat = (arr.find("td", { "class": "platform" }).find("span") or arr.find("td", { "class": "platform" })).string.strip() or None
depts = datetime.datetime.strptime("%s %s" % (depdate, deptime), "%d.%m.%Y %H:%M")
arrts = datetime.datetime.strptime("%s %s" % (arrdate, arrtime), "%d.%m.%Y %H:%M")
depprog = deptime if depprog == "pünktlich" else depprog
arrprog = arrtime if arrprog == "pünktlich" else arrprog
cdate = arrdate
svc = Service(name, depst, depts, arrst, arrts, dest, depplat, depprog, arrplat, arrprog)
conn.addService(svc)
yield conn
else: else:
for i in range(0, count): for i in range(0, count):