Commit be092826 authored by Klaus-Uwe Mitterer's avatar Klaus-Uwe Mitterer

Loads of changes. Filling the database seems to work now.

parent 59fe1a23
...@@ -63,13 +63,24 @@ class dbObject: ...@@ -63,13 +63,24 @@ class dbObject:
except: except:
return False return False
def getLatestMessage(db): def getLatestMessage(db, mode = 0, user = setuptools.user()):
db.executeQuery("SELECT max(id) FROM messages") if mode == 0:
db.executeQuery("SELECT max(id) FROM messages WHERE recipient_id='%s'" % user)
else:
db.executeQuery("SELECT max(id) FROM messages WHERE sender_id='%s'" % user)
try: try:
return int(db.getNext()[0]) return int(db.getNext()[0])
except: except:
return 0 return 0
def checkID(db, mid):
db.executeQuery("SELECT * FROM messages WHERE id=%s" % mid)
try:
db.getNext()[0]
return True
except:
return False
def dbHelper(): def dbHelper():
if setuptools.dbtype() == SQLITE: if setuptools.dbtype() == SQLITE:
return dbObject(dbtype=SQLITE, path=setuptools.dbpath()) return dbObject(dbtype=SQLITE, path=setuptools.dbpath())
......
#!/usr/bin/env python3 #!/usr/bin/env python3
from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import NoSuchElementException
import urllib.request, urllib.error, urllib.parse, time, os import urllib.request, urllib.error, urllib.parse, time, os
import setuptools import dbtools, setuptools
currentRun = []
def status(driver): def status(driver):
if "/main/login.php" not in driver.page_source: if "/main/login.php" not in driver.page_source:
...@@ -13,14 +15,15 @@ def status(driver): ...@@ -13,14 +15,15 @@ def status(driver):
else: else:
return False return False
def loadPage(url,period=5,init=False, driver=driver): def loadPage(url, driver, period=5,init=False):
if not (init or status(driver)): if not (init or status(driver)):
login() login(driver)
driver.get(url) driver.get(url)
time.sleep(period) time.sleep(period)
def loginHandler(user = setuptools.user, password = setuptools.password, driver = driver): def loginHandler(driver, user = setuptools.user(), password = setuptools.password()):
loadPage("http://www.planetromeo.com/",10,True,driver) loadPage("https://www.planetromeo.com/",driver,3,True)
loadPage("https://www.planetromeo.com/main/login.php",driver,3,True)
curfield = driver.find_element_by_name("username") curfield = driver.find_element_by_name("username")
curfield.send_keys(user) curfield.send_keys(user)
...@@ -29,59 +32,89 @@ def loginHandler(user = setuptools.user, password = setuptools.password, driver ...@@ -29,59 +32,89 @@ def loginHandler(user = setuptools.user, password = setuptools.password, driver
curfield.send_keys(password) curfield.send_keys(password)
curfield.send_keys(Keys.RETURN) curfield.send_keys(Keys.RETURN)
time.sleep(10) time.sleep(3)
return status() return status(driver)
class LoginError(Exception): class LoginError(Exception):
pass pass
def login(): def login(driver):
if not (status() or loginHandler()): if not (status(driver) or loginHandler(driver)):
raise LoginError("Login failed.") raise LoginError("Login failed.")
return True return True
def messageID(url): def messageID(url):
return url.split("=")[1] return url.split("=")[-1]
def messageHandler(sender, recipient, mid, date, driver, mode = 0, db = dbtools.dbHelper()):
global currentRun
if mode == 0:
loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver)
else:
loadPage("https://www.planetromeo.com/msg/?type=sent&id=" + mid, driver)
juha = BeautifulSoup(driver.page_source, "html5lib")
text = juha.select("div.msg div")[0]
def messageHandler(mid, driver): db.executeQuery("INSERT INTO messages(id, text, sender_id, recipient_id, created_at) VALUES('%s', '%s', '%s', '%s', '%s');" % (mid, setuptools.unescapeText(text.string or "").strip(), sender, recipient, date))
loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver=driver) db.commit()
try: try:
links = driver.find_elements_by_partial_link_text('pix/popup.php/') links = juha.findAll("a")
for link in links: for link in links:
phototools.processURL(link.get_attribute('href'), mid) if "/pix/popup.php/" in link["href"]:
except NoSuchElementException as e: phototools.processURL(link["href"], sender)
pass except:
pass
currentRun += [mid]
def pageHandler(driver): def pageHandler(driver, db = dbtools.dbHelper()):
webpage = driver.page_source global currentRun
links = BeautifulSoup(webpage).findAll('a')
count = 0 count = 0
juha = BeautifulSoup(driver.page_source, "html5lib")
for l in links: mode = 0
url = l['href'] if "sent" in driver.current_url:
if "/msg/?id=" in url: mode = 1
count += 1
mid = messageID(url) try:
if mid <= dbtools.getLatestMessage() for msg in juha.select("table.messageCenter tr")[1:]:
return False try:
messageHandler(mid, driver) data = msg.findAll('td')
user = data[1].string
mid = messageID(data[2].find("a")["href"])
date = data[3].string
if not db.checkID(mid):
if mode == 1:
messageHandler(user, setuptools.user(), mid, date, driver, mode, db)
else:
messageHandler(setuptools.user(), user, mid, date, driver, mode, db)
count += 1
except IndexError:
pass
except IndexError:
return False
if count == 0: if count == 0:
return False return False
return True return True
def siteHandler(p = 0, driver = driver): def siteHandler(driver, mode = 0, p = 0, db = dbtools.dbHelper()):
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?seite=" + str(p), driver=driver) if mode == 0:
if pageHandler(driver): loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver)
siteHandler(p+1, driver) else:
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver)
if pageHandler(driver, db):
siteHandler(driver, mode, p+1, db)
if __name__ == "__main__": if __name__ == "__main__":
db = dbtools.dbHelper()
driver = webdriver.Firefox() driver = webdriver.Firefox()
if login(driver): if loginHandler(driver):
siteHandler(driver=driver) siteHandler(driver, db=db)
siteHandler(driver, 1, db=db)
print("KTHXBAI") print("KTHXBAI")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment