Futher preparation for MySQL, which is not working yet though...

Make filler handle direct messages
Add table and functions for direct messages
This commit is contained in:
Klaus-Uwe Mitterer 2016-06-30 12:33:09 +02:00
parent aaa683aa73
commit aad1531392
8 changed files with 67 additions and 28 deletions

View file

@ -4,29 +4,22 @@ import dbtools
import sqlite3, csv, sys
def makeDB(dbo=dbtools.dbObject(), infile='tweets.csv'):
def makeDB(dbo=dbtools.dbHelper(), infile='tweets.csv'):
try:
infile = open(infile)
except IOError:
raise IOError("Unable to read %s." % infile)
input = list(csv.reader(infile))
infile = list(csv.reader(infile))
try:
dbo.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
except:
raise IOError("%s already exists. Please delete it before trying to create a new one." % path)
for row in infile[1:]:
try:
dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');")
except:
pass
for row in input[1:]:
dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');")
dbo.commit()
if __name__ == "__main__":
if len(sys.argv) > 2:
raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.")
try:
makeDB(sys.argv[1])
except IndexError:
makeDB()
makeDB()

View file

@ -65,7 +65,18 @@ class dbObject:
mode = "MIN"
else:
mode = "MAX"
return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0]))
if self.dbtype == SQLITE:
return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0]))
else:
self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode)
return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode]))
def getLatestMessage(db):
db.executeQuery("SELECT max(id) FROM messages")
try:
return int(db.getNext()[0])
except:
return 0
def getLatestTweet(db):
db.executeQuery("SELECT max(tweet_id) FROM tweets")
@ -74,3 +85,10 @@ class dbObject:
except:
return 0
def dbHelper():
if setuptools.dbtype() == SQLITE:
return dbObject(dbtype=SQLITE, path=setuptools.dbpath())
elif setuptools.dbtype() == MYSQL:
return dbObject(dbtype=MYSQL, host=setuptools.dbhost(), user=setuptools.dbuser(), pwd=setuptools.dbpass(), db=setuptools.dbname())
else:
raise setuptools.SetupException()

View file

@ -2,9 +2,9 @@
import dbtools, setuptools, twitools
def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools.twObject()):
def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()):
query = "from:" + user
savepoint = db.getLatestTweet()
savepoint = db.getLatestTweet() + 1
last = savepoint
timeline = two.search(query, savepoint)
@ -15,7 +15,10 @@ def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools.
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
text = setuptools.unescapeText(status.text)
db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
try:
db.executeQuery("INSERT INTO tweets(tweet_id,timestamp,text) VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
except:
pass
db.commit()
last = status.id
@ -25,7 +28,32 @@ def fill(db=dbtools.dbObject(), user=twitools.twObject().whoami(), two=twitools.
return tw_counter, last, savepoint
if __name__ == "__main__":
count, last, first = fill()
print("Stored %i tweets after %i until %i." % (count, first, last))
def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()):
mcount = 0
savepoint = db.getLatestMessage() + 1
new_messages = two.api.direct_messages(since_id=savepoint, count=200, full_text=True)
new_out_messages = two.api.sent_direct_messages(since_id=savepoint, count=200, full_text=True)
for m in new_messages:
try:
db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at))
mcount += 1
except:
pass
for m in new_out_messages:
try:
db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at))
mcount += 1
except:
pass
db.commit()
return mcount, savepoint or 0, db.getLatestMessage()
if __name__ == "__main__":
count, last, first = getTweets()
print("Stored %i tweets after %i until %i." % (count, first, last))
count, last, first = getMessages()
print("Stored %i messages after %i until %i." % (count, first, last))

View file

@ -91,7 +91,7 @@ def getHeaders(strings, av):
return [headers]
def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbObject(), headers = False):
def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbHelper(), headers = False):
if fr == None:
fr = db.getFLDate()
if to == None:

View file

@ -3,7 +3,7 @@
import dbtools
import argparse, operator, re, sys
def getTweets(mode = "@", db = dbtools.dbObject()):
def getTweets(mode = "@", db = dbtools.dbHelper()):
handles = dict()
tweets = db.executeQuery("SELECT text FROM tweets")
@ -36,5 +36,5 @@ if __name__ == "__main__":
else:
mode = "@"
for handle, tweets in sorted(list(getTweets(mode,dbtools.dbObject()).items()), key=operator.itemgetter(1), reverse=True):
for handle, tweets in sorted(list(getTweets(mode=mode).items()), key=operator.itemgetter(1), reverse=True):
print(handle + "," + str(tweets))

View file

@ -39,7 +39,7 @@ def queryBuilder(fr, to):
return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to)
def getDataByDate(fr, to, db = dbtools.dbObject(), headers = True):
def getDataByDate(fr, to, db = dbtools.dbHelper(), headers = True):
if fr == None:
fr = db.getFLDate()
if to == None:

View file

@ -4,7 +4,7 @@ import dbtools
import sys
def makeDB(db=dbtools.dbObject()):
def makeDB(db=dbtools.dbHelper()):
db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
db.commit()

View file

@ -12,7 +12,7 @@ def getSetting(section, setting, path = "config.cfg"):
def dbtype():
try:
return getSetting("Database", "type")
return int(getSetting("Database", "type"))
except:
return 0 # for SQLite3