Use PhantomJS instead of Firefox, update URLs, login immediately upon launch

This commit is contained in:
Klaus-Uwe Mitterer 2017-07-19 16:11:13 +02:00
parent 90e997a500
commit 5e59a800b2

View file

@ -4,12 +4,11 @@ from bs4 import BeautifulSoup
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import NoSuchElementException
from xvfbwrapper import Xvfb
import multiprocessing, urllib.request, urllib.error, urllib.parse, time, os import multiprocessing, urllib.request, urllib.error, urllib.parse, time, os
import dbtools, phototools, setuptools import dbtools, phototools, setuptools
def status(driver): def status(driver):
if "/main/login.php" not in driver.page_source: if "Unauthorised Access" not in driver.page_source and "/main/login.php" not in driver.page_source and len(driver.page_source) > 100:
return True return True
else: else:
return False return False
@ -21,8 +20,8 @@ def loadPage(url, driver, period=5,init=False):
time.sleep(period) time.sleep(period)
def loginHandler(driver, user = setuptools.user(), password = setuptools.password()): def loginHandler(driver, user = setuptools.user(), password = setuptools.password()):
loadPage("https://www.planetromeo.com/",driver,3,True) loadPage("https://classic.planetromeo.com/",driver,3,True)
loadPage("https://www.planetromeo.com/main/login.php",driver,3,True) loadPage("https://classic.planetromeo.com/main/login.php",driver,3,True)
curfield = driver.find_element_by_name("username") curfield = driver.find_element_by_name("username")
curfield.send_keys(user) curfield.send_keys(user)
@ -47,7 +46,7 @@ def messageID(url):
return url.split("=")[-1] return url.split("=")[-1]
def quickshareHandler(driver, url, sender): def quickshareHandler(driver, url, sender):
nurl = "https://www.planetromeo.com/" + url if "planetromeo.com" not in url else url nurl = "https://classic.planetromeo.com/" + url if "planetromeo.com" not in url else url
loadPage(nurl) loadPage(nurl)
juha = BeautifulSoup(driver.page_source, "html5lib") juha = BeautifulSoup(driver.page_source, "html5lib")
@ -55,7 +54,7 @@ def quickshareHandler(driver, url, sender):
links = juha.findAll("a") links = juha.findAll("a")
for link in links: for link in links:
try: try:
purl = "https://www.planetromeo.com/" + link["data-pic"] if "planetromeo.com" not in link["data-pic"] else link["data-pic"] purl = "https://classic.planetromeo.com/" + link["data-pic"] if "planetromeo.com" not in link["data-pic"] else link["data-pic"]
phototools.processURL(purl, sender, shutup=True) phototools.processURL(purl, sender, shutup=True)
except: except:
pass pass
@ -64,9 +63,9 @@ def quickshareHandler(driver, url, sender):
def messageHandler(sender, recipient, mid, date, driver, mode = 0, db = dbtools.dbHelper()): def messageHandler(sender, recipient, mid, date, driver, mode = 0, db = dbtools.dbHelper()):
if mode == 0: if mode == 0:
loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver) loadPage("https://classic.planetromeo.com/msg/?id=" + mid, driver)
else: else:
loadPage("https://www.planetromeo.com/msg/?type=sent&id=" + mid, driver) loadPage("https://classic.planetromeo.com/msg/?type=sent&id=" + mid, driver)
juha = BeautifulSoup(driver.page_source, "html5lib") juha = BeautifulSoup(driver.page_source, "html5lib")
text = juha.select("div.msg div")[0] text = juha.select("div.msg div")[0]
@ -120,20 +119,22 @@ def pageHandler(driver, db = dbtools.dbHelper()):
def siteHandler(driver, mode = 0, p = 0, db = dbtools.dbHelper()): def siteHandler(driver, mode = 0, p = 0, db = dbtools.dbHelper()):
if mode == 0: if mode == 0:
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver) loadPage("https://classic.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver)
else: else:
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver) loadPage("https://classic.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver)
if pageHandler(driver, db): if pageHandler(driver, db):
siteHandler(driver, mode, p+1, db) siteHandler(driver, mode, p+1, db)
def mainHandler(driver, db): def mainHandler(driver, db):
loginHandler(driver)
siteHandler(driver, 0, db=db) siteHandler(driver, 0, db=db)
siteHandler(driver, 1, db=db) siteHandler(driver, 1, db=db)
if __name__ == "__main__": if __name__ == "__main__":
with Xvfb() as xvfb: db = dbtools.dbHelper()
db = dbtools.dbHelper() caps = webdriver.DesiredCapabilities().PHANTOMJS.copy()
driver = webdriver.Firefox() caps["phantoms.page.settings.userAgent"] = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0"
mainHandler(driver, db) driver = webdriver.PhantomJS(desired_capabilities=caps)
driver.close() mainHandler(driver, db)
driver.close()