Commit be092826 authored by Klaus-Uwe Mitterer's avatar Klaus-Uwe Mitterer

Loads of changes. Filling the database seems to work now.

parent 59fe1a23
......@@ -63,13 +63,24 @@ class dbObject:
except:
return False
def getLatestMessage(db):
db.executeQuery("SELECT max(id) FROM messages")
def getLatestMessage(db, mode = 0, user = setuptools.user()):
if mode == 0:
db.executeQuery("SELECT max(id) FROM messages WHERE recipient_id='%s'" % user)
else:
db.executeQuery("SELECT max(id) FROM messages WHERE sender_id='%s'" % user)
try:
return int(db.getNext()[0])
except:
return 0
def checkID(db, mid):
db.executeQuery("SELECT * FROM messages WHERE id=%s" % mid)
try:
db.getNext()[0]
return True
except:
return False
def dbHelper():
if setuptools.dbtype() == SQLITE:
return dbObject(dbtype=SQLITE, path=setuptools.dbpath())
......
#!/usr/bin/env python3
from BeautifulSoup import BeautifulSoup
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
import urllib.request, urllib.error, urllib.parse, time, os
import setuptools
import dbtools, setuptools
currentRun = []
def status(driver):
if "/main/login.php" not in driver.page_source:
......@@ -13,14 +15,15 @@ def status(driver):
else:
return False
def loadPage(url,period=5,init=False, driver=driver):
def loadPage(url, driver, period=5,init=False):
if not (init or status(driver)):
login()
login(driver)
driver.get(url)
time.sleep(period)
def loginHandler(user = setuptools.user, password = setuptools.password, driver = driver):
loadPage("http://www.planetromeo.com/",10,True,driver)
def loginHandler(driver, user = setuptools.user(), password = setuptools.password()):
loadPage("https://www.planetromeo.com/",driver,3,True)
loadPage("https://www.planetromeo.com/main/login.php",driver,3,True)
curfield = driver.find_element_by_name("username")
curfield.send_keys(user)
......@@ -29,59 +32,89 @@ def loginHandler(user = setuptools.user, password = setuptools.password, driver
curfield.send_keys(password)
curfield.send_keys(Keys.RETURN)
time.sleep(10)
time.sleep(3)
return status()
return status(driver)
class LoginError(Exception):
pass
def login():
if not (status() or loginHandler()):
def login(driver):
if not (status(driver) or loginHandler(driver)):
raise LoginError("Login failed.")
return True
def messageID(url):
return url.split("=")[1]
return url.split("=")[-1]
def messageHandler(sender, recipient, mid, date, driver, mode = 0, db = dbtools.dbHelper()):
global currentRun
if mode == 0:
loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver)
else:
loadPage("https://www.planetromeo.com/msg/?type=sent&id=" + mid, driver)
juha = BeautifulSoup(driver.page_source, "html5lib")
text = juha.select("div.msg div")[0]
def messageHandler(mid, driver):
loadPage("https://www.planetromeo.com/msg/?id=" + mid, driver=driver)
db.executeQuery("INSERT INTO messages(id, text, sender_id, recipient_id, created_at) VALUES('%s', '%s', '%s', '%s', '%s');" % (mid, setuptools.unescapeText(text.string or "").strip(), sender, recipient, date))
db.commit()
try:
links = driver.find_elements_by_partial_link_text('pix/popup.php/')
links = juha.findAll("a")
for link in links:
phototools.processURL(link.get_attribute('href'), mid)
except NoSuchElementException as e:
pass
if "/pix/popup.php/" in link["href"]:
phototools.processURL(link["href"], sender)
except:
pass
currentRun += [mid]
def pageHandler(driver):
webpage = driver.page_source
links = BeautifulSoup(webpage).findAll('a')
def pageHandler(driver, db = dbtools.dbHelper()):
global currentRun
count = 0
juha = BeautifulSoup(driver.page_source, "html5lib")
for l in links:
url = l['href']
if "/msg/?id=" in url:
count += 1
mid = messageID(url)
if mid <= dbtools.getLatestMessage()
return False
messageHandler(mid, driver)
mode = 0
if "sent" in driver.current_url:
mode = 1
try:
for msg in juha.select("table.messageCenter tr")[1:]:
try:
data = msg.findAll('td')
user = data[1].string
mid = messageID(data[2].find("a")["href"])
date = data[3].string
if not db.checkID(mid):
if mode == 1:
messageHandler(user, setuptools.user(), mid, date, driver, mode, db)
else:
messageHandler(setuptools.user(), user, mid, date, driver, mode, db)
count += 1
except IndexError:
pass
except IndexError:
return False
if count == 0:
return False
return True
def siteHandler(p = 0, driver = driver):
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?seite=" + str(p), driver=driver)
if pageHandler(driver):
siteHandler(p+1, driver)
def siteHandler(driver, mode = 0, p = 0, db = dbtools.dbHelper()):
if mode == 0:
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=all&seite=" + str(p), driver)
else:
loadPage("https://www.planetromeo.com/mitglieder/messages/uebersicht.php?view=sent&seite=" + str(p), driver)
if pageHandler(driver, db):
siteHandler(driver, mode, p+1, db)
if __name__ == "__main__":
db = dbtools.dbHelper()
driver = webdriver.Firefox()
if login(driver):
siteHandler(driver=driver)
if loginHandler(driver):
siteHandler(driver, db=db)
siteHandler(driver, 1, db=db)
print("KTHXBAI")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment