diff --git a/filler.py b/filler.py index ea58c22..3292fbd 100755 --- a/filler.py +++ b/filler.py @@ -1,51 +1,44 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import config, html.parser, sqlite3, time, tweepy, os +import tools -def fill(user=config.user,dbpath=config.dbpath): - search = "from:" + user - - conn = sqlite3.connect(dbpath) - cur = conn.cursor() +import html.parser, os +def getSavepoint(db): + db.executeQuery("SELECT max(tweet_id) FROM tweets") try: - cur.execute("SELECT max(tweet_id) FROM tweets") + return int(db.getNext()[0]) except: - raise IOError("Database file " + dbpath + " does not exist or is in the wrong format. Please create it before trying to populate it.") + print("No tweets stored yet.") + return 0 - try: - savepoint = int(cur.fetchone()[0]) - except: - savepoint = 0 +def unescapeText(text): + return html.parser.HTMLParser().unescape(text).replace("'","''") +def fill(dbpath=tools.config.dbpath, user=tools.config.user, two=tools.twObject()): + query = "from:" + user + + db = tools.dbHelper(dbpath) + + savepoint = getSavepoint(db) last = savepoint - auth = tweepy.OAuthHandler(config.cke, config.cse) - auth.set_access_token(config.ato, config.ase) - api = tweepy.API(auth) - - timelineIterator = list(tweepy.Cursor(api.search, q=search, since_id=savepoint).items()) - - timeline = [] - - for status in timelineIterator: - timeline.append(status) - - timeline.reverse() + timeline = two.search(query, savepoint) tw_counter = 0 for status in timeline: timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" - text = html.parser.HTMLParser().unescape(status.text).replace("'", "''") + text = unescapeText(status.text) + + db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") + db.commit() - cur.execute("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") last = status.id tw_counter = tw_counter + 1 - conn.commit() - conn.close() + db.closeConnection() return tw_counter, last, savepoint diff --git a/tools.py b/tools.py index a99f56d..21a29a6 100644 --- a/tools.py +++ b/tools.py @@ -1,12 +1,13 @@ import config -import os, sqlite3 +import os, sqlite3, tweepy class dbObject: def __init__(self, path=config.dbpath): self.conn = sqlite3.connect(path) self.cur = self.conn.cursor() + self.path = path def closeConnection(self): return self.conn.close() @@ -17,11 +18,8 @@ class dbObject: def executeQuery(self, query): return self.cur.execute(query) - def GetConnection(self): - return self.conn - - def GetCursor(self): - return self.cur + def getNext(self): + return self.cur.fetchone() def isInitialized(self): try: @@ -31,12 +29,25 @@ class dbObject: return False +class twObject: + + def __init__(self, cke = config.cke, cse = config.cse, ato = config.ato, ase = config.ase): + self.auth = tweepy.OAuthHandler(config.cke, config.cse) + self.auth.set_access_token(config.ato, config.ase) + self.api = tweepy.API(self.auth) + + def search(self, query, savepoint = 0): + tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items()) + tweets.reverse() + return tweets + + def dbCheck(db, create = False): - if (!create and dbInitialized(db)) or (create and !dbInitialized(db)): + if (not create and dbInitialized(db)) or (create and not dbInitialized(db)): return True if create: - raise ValueError("Provided database file " + path + " is already initialized. Remove it manually before trying to recreate it.") - raise ValueError("Provided database file " + path + " is not initialized. Create it using makedb.py or csvdb.py") + raise ValueError("Provided database file " + db.path + " is already initialized. Remove it manually before trying to recreate it.") + raise ValueError("Provided database file " + db.path + " is not initialized. Create it using makedb.py or csvdb.py") def dbHelper(path, create = False):