Improve requests session creation and connection error handling

This commit is contained in:
Klaus-Uwe Mitterer 2017-09-12 20:47:02 +02:00
parent 3ed2669c9d
commit b280f6b469

View file

@ -8,6 +8,7 @@ import glob, multiprocessing, re, requests, urllib.request, urllib.error, urllib
import setuptools
useragent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0"
session = None
def status(driver):
if "Benutzername oder E-Mail-Adresse:" not in driver.page_source and 'href="login/"' not in driver.page_source:
@ -22,7 +23,7 @@ def loadPage(url, driver, period=5,init=False):
time.sleep(period)
def loginHandler(driver, user = setuptools.user(), password = setuptools.password()):
loadPage("https://scriptzbase.org/login/",driver,3,True)
loadPage("https://scriptzbase.org/login/", driver, 3, True)
curfield = driver.find_element_by_name("login")
curfield.send_keys(user)
@ -39,31 +40,47 @@ class LoginError(Exception):
pass
def login(driver):
global session
if not (status(driver) or loginHandler(driver)):
raise LoginError("Login failed.")
return True
def pageHandler(driver):
juha = BeautifulSoup(driver.page_source, "html5lib")
session = requests.Session()
kekse = driver.get_cookies()
session.headers.update({"User-Agent": useragent})
for keks in kekse:
session.cookies.set(keks["name"], keks["value"])
session.headers.update({"User-Agent": useragent})
return True
def fileHandler(url, retry = False):
global session
try:
fid = url.split("=")[1]
if not glob.glob("files/sbd%s" % fid):
res = session.get("https://scriptzbase.org/%s" % url)
fname = re.findall("filename=(.+)", res.headers["content-disposition"])[0].split(";")[0].strip('"')
with open("files/sbd%s_%s" % (fid, fname), "wb") as out:
out.write(res.content)
except requests.exceptions.ConnectionError:
if not retry:
time.sleep(10)
fileHandler(url, True)
else:
raise
def pageHandler(driver):
juha = BeautifulSoup(driver.page_source, "html5lib")
for a in juha.findAll("a"):
try:
if "/download?version=" in a["href"]:
fid = a["href"].split("=")[1]
if not glob.glob("files/sbd%s*" % fid):
res = session.get("https://scriptzbase.org/%s" % a["href"])
fname = re.findall("filename=(.+)", res.headers["content-disposition"])[0].split(";")[0].strip('"')
with open("files/sbd%s_%s" % (fid, fname), "wb") as out:
out.write(res.content)
fileHandler(a["href"])
except KeyError:
pass
@ -76,7 +93,6 @@ def siteHandler(driver, p = 1):
def mainHandler(driver):
os.makedirs("files", exist_ok=True)
loginHandler(driver)
siteHandler(driver)
if __name__ == "__main__":