Merge branch 'master' of git.klaus-uwe.me:kumitterer/twitools

This commit is contained in:
Klaus-Uwe Mitterer 2016-12-06 22:30:18 +01:00
commit c1bb677d40
11 changed files with 322 additions and 185 deletions

View file

@ -1,35 +1,25 @@
#!/usr/bin/env python3
import tools
import dbtools
import sqlite3, csv, sys
def makeDB(path=tools.dbpath()):
def makeDB(dbo=dbtools.dbHelper(), infile='tweets.csv'):
try:
infile = open('tweets.csv')
infile = open(infile)
except IOError:
raise IOError("Please make sure that the tweets.csv from the Twitter download is located in this directory.")
raise IOError("Unable to read %s." % infile)
input = list(csv.reader(infile))
infile = list(csv.reader(infile))
conn = sqlite3.connect(path)
cur = conn.cursor()
try:
cur.execute("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
except sqlite3.OperationalError:
raise IOError("%s already exists. Please delete it before trying to create a new one." % path)
for row in infile[1:]:
try:
dbo.executeQuery("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');")
except:
pass
for row in input[1:]:
cur.execute("INSERT INTO tweets VALUES(" + row[0].replace("'","''") + ",'" + row[1].replace("'","''") + "','" + row[2].replace("'","''") + "','" + row[3].replace("'","''") + "','" + row[4].replace("'","''") + "','" + row[5].replace("'","''") + "','" + row[6].replace("'","''") + "','" + row[7].replace("'","''") + "','" + row[8].replace("'","''") + "','" + row[9].replace("'","''") + "');")
conn.commit()
dbo.commit()
if __name__ == "__main__":
if len(sys.argv) > 2:
raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.")
try:
makeDB(sys.argv[1])
except IndexError:
makeDB()
makeDB()

108
dbtools/__init__.py Normal file
View file

@ -0,0 +1,108 @@
import setuptools
import sqlite3, pymysql, pymysql.cursors
SQLITE = 0
MYSQL = 1
MARIADB = MYSQL
MIN = 0
MAX = 1
class dbObject:
# --------------------------------------------- Initialization -------------------------------------------------
def initMySQL(self, host, port, user, pwd, db):
self.conn = pymysql.connect(host = host, port = port, user = user, password = pwd, db = db, charset = "utf8mb4", cursorclass = pymysql.cursors.DictCursor)
self.cur = self.conn.cursor()
self.dbtype = MYSQL
self.host = host
self.port = port
self.user = user
self.pwd = pwd
self.db = db
def initSQLite(self, path):
self.conn = sqlite3.connect(path)
self.cur = self.conn.cursor()
self.dbtype = SQLITE
self.path = path
def __init__(self, dbtype = SQLITE, path = None, host = None, port = None, user = None, pwd = None, db = None):
if dbtype == SQLITE:
self.initSQLite(path or 'Database.db')
elif dbtype == MYSQL:
self.initMySQL(host or 'localhost', port or 3306, user, pwd, db)
else:
raise ValueError("Unknown database type %s." % str(dbtype))
# ---------------------------------------------- No more initialization ----------------------------------------
def closeConnection(self):
return self.conn.close()
def commit(self):
return self.conn.commit()
def executeQuery(self, query):
return self.cur.execute(query)
def getAll(self):
return self.cur.fetchall()
def getNext(self):
return self.cur.fetchone()
def isInitialized(self):
try:
self.executeQuery("SELECT * FROM tweets")
return True
except:
return False
def getFLDate(self, val = MIN):
if val == MIN:
mode = "MIN"
else:
mode = "MAX"
if self.dbtype == SQLITE:
return setuptools.getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0]))
else:
self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode)
return setuptools.getDate(str(self.getNext()["%s(SUBSTR(timestamp,0,11))" % mode]))
def getFollowers(db):
db.executeQuery("SELECT id FROM followers WHERE `until` = 0;")
for i in db.getAll():
yield i[0]
def getFollowing(db):
db.executeQuery("SELECT id FROM following WHERE `until` = 0;")
for i in db.getAll():
yield i[0]
def getLatestMessage(db):
db.executeQuery("SELECT max(id) FROM messages")
try:
return int(db.getNext()[0])
except:
return 0
def getLatestTweet(db):
db.executeQuery("SELECT max(tweet_id) FROM tweets")
try:
return int(db.getNext()[0])
except:
return 0
def dbHelper():
if setuptools.dbtype() == SQLITE:
return dbObject(dbtype=SQLITE, path=setuptools.dbpath())
elif setuptools.dbtype() == MYSQL:
return dbObject(dbtype=MYSQL, host=setuptools.dbhost(), user=setuptools.dbuser(), pwd=setuptools.dbpass(), db=setuptools.dbname())
else:
raise setuptools.SetupException()

View file

@ -1,10 +1,9 @@
#!/usr/bin/env python3
import tools
import twitools
import tkinter, tkinter.messagebox, html.parser, os
two = tools.twObject()
two = twitools.twObject()
top = tkinter.Tk()
top.title("Tweet Deleter")
scrollbar = tkinter.Scrollbar(top)
@ -41,7 +40,7 @@ def addStatus(id, text):
list.insert(0, element.encode("UTF-8"))
def getTweets():
query = "from:" + tools.user()
query = "from:" + twitools.twObject().whoami()
try:
timeline = two.search(query, 0)

121
filler.py
View file

@ -1,26 +1,10 @@
#!/usr/bin/env python3
import tools
import argparse, dbtools, setuptools, time, twitools
import html.parser, os
def getSavepoint(db):
db.executeQuery("SELECT max(tweet_id) FROM tweets")
try:
return int(db.getNext()[0])
except:
print("No tweets stored yet.")
return 0
def unescapeText(text):
return html.parser.HTMLParser().unescape(text).replace("'","''")
def fill(dbpath=tools.dbpath(), user=tools.user(), two=tools.twObject()):
def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()):
query = "from:" + user
db = tools.dbHelper(dbpath)
savepoint = getSavepoint(db)
savepoint = db.getLatestTweet() + 1
last = savepoint
timeline = two.search(query, savepoint)
@ -29,18 +13,105 @@ def fill(dbpath=tools.dbpath(), user=tools.user(), two=tools.twObject()):
for status in timeline:
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
text = unescapeText(status.text)
text = setuptools.unescapeText(status.text)
db.executeQuery("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
try:
db.executeQuery("INSERT INTO tweets(tweet_id,timestamp,text) VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
except:
pass
db.commit()
last = status.id
tw_counter = tw_counter + 1
db.closeConnection()
return tw_counter, last, savepoint
def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()):
mcount = 0
savepoint = db.getLatestMessage() + 1
new_messages = two.api.direct_messages(since_id=savepoint, count=200, full_text=True)
new_out_messages = two.api.sent_direct_messages(since_id=savepoint, count=200, full_text=True)
for m in new_messages:
try:
db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at))
mcount += 1
except:
pass
for m in new_out_messages:
try:
db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at))
mcount += 1
except:
pass
db.commit()
return mcount, savepoint or 0, db.getLatestMessage()
def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False):
current = list(db.getFollowers())
new = list(twitools.getNamesByIDs(twitools.getFollowerIDs()))
gained = 0
lost = 0
if (len(current) == 0 or len(new) == 0) and not firstrun:
print("Something went wrong.")
return 0,0
for follower in new:
if follower not in current:
db.executeQuery("INSERT INTO followers VALUES('%s', %i, 0)" % (follower, int(time.time())))
print("New follower: %s" % follower)
gained += 1
for follower in current:
if follower not in new:
db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), follower))
print("Lost follower: %s" % follower)
lost += 1
db.commit()
return gained, lost
def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False):
current = list(db.getFollowing())
new = list(twitools.getNamesByIDs(twitools.getFollowingIDs()))
gained = 0
lost = 0
if (len(current) == 0 or len(new) == 0) and not firstrun:
print("Something went wrong.")
return 0,0
for following in new:
if following not in current:
db.executeQuery("INSERT INTO following VALUES('%s', %i, 0)" % (following, int(time.time())))
print("You started following: %s" % following)
gained += 1
for following in current:
if following not in new:
db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), following))
print("You no longer follow: %s" % following)
lost += 1
db.commit()
return gained, lost
if __name__ == "__main__":
count, last, first = fill()
print("Stored %i tweets after %i until %i." % (count, first, last))
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--first", help="first run: ignore empty databases", action="store_true")
args = parser.parse_args()
db = dbtools.dbHelper()
count, last, first = getTweets(db)
print("Stored %i tweets." % count)
count, last, first = getMessages(db)
print("Stored %i messages." % count)
gained, lost = getFollowers(db, firstrun=args.first)
print("Gained %i followers, lost %i." % (gained, lost))
gained, lost = getFollowing(db, firstrun=args.first)
print("Started following %i, stopped following %i." % (gained, lost))

View file

@ -1,30 +0,0 @@
#!/usr/bin/env python3
import tools
import os, time, tweepy
def getFollowerIDs(two=tools.twObject()):
''' Returns 5,000 follower IDs at most '''
return two.api.followers_ids(screen_name=tools.user())
def getNamesByIDs(fids=getFollowerIDs(), two=tools.twObject()):
for page in tools.paginate(fids, 100):
followers = two.api.lookup_users(user_ids=page)
for follower in followers:
yield follower.screen_name
def getOutDir(dirname="followers"):
if not os.path.isdir(dirname):
os.mkdir(dirname)
def getOutFile(dirname="followers"):
getOutDir(dirname)
return os.path.join(dirname, str(int(time.time())) + ".txt")
def writeOutFile(outfile=getOutFile()):
with open(getOutFile(), 'a') as f:
for follower in getNamesByIDs(getFollowerIDs()):
f.write(follower + "\n")
if __name__ == "__main__":
writeOutFile()

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python3
import tools
import dbtools, setuptools
import sys, datetime
def dateArgs(argv = sys.argv[1:]):
@ -30,10 +29,10 @@ def dateArgs(argv = sys.argv[1:]):
raise ValueError("Number of days for running average must be an integer.")
mode = 0
elif mode == 1:
fr = tools.getDate(arg)
fr = setuptools.getDate(arg)
mode = 0
else:
to = tools.getDate(arg)
to = setuptools.getDate(arg)
mode = 0
if mode in (1, 2):
@ -92,9 +91,7 @@ def getHeaders(strings, av):
return [headers]
def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbpath(), headers = False):
db = tools.dbHelper(path)
def getTweetsByDate(strings = [], fr = None, to = None, av = 0, db = dbtools.dbHelper(), headers = False):
if fr == None:
fr = db.getFLDate()
if to == None:
@ -111,4 +108,4 @@ def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.dbp
return cur
if __name__ == "__main__":
tools.printCSV(getTweetsByDate(*dateArgs(), headers = True))
setuptools.printCSV(getTweetsByDate(*dateArgs(), headers = True))

View file

@ -1,12 +1,9 @@
#!/usr/bin/env python3
import tools
import dbtools
import argparse, operator, re, sys
import operator, re, sys
def getTweets(mode = "@", path = tools.dbpath()):
db = tools.dbHelper(path)
def getTweets(mode = "@", db = dbtools.dbHelper()):
handles = dict()
tweets = db.executeQuery("SELECT text FROM tweets")
@ -26,20 +23,18 @@ def getTweets(mode = "@", path = tools.dbpath()):
return handles
if __name__ == "__main__":
mode = "@"
path = tools.dbpath()
parser = argparse.ArgumentParser()
g = parser.add_mutually_exclusive_group()
g.add_argument("-t", "--hashtags", help="count only #hashtags", action="store_true")
g.add_argument("-w", "--words", help="count all words", action="store_true")
g.add_argument("-m", "--mentions", help="count only @mentions (default)", action="store_true")
args = parser.parse_args()
if args.hashtags:
mode = "#"
elif args.words:
mode = ""
else:
mode = "@"
if len(sys.argv) > 1:
if len(sys.argv) > 3 or (len(sys.argv) == 3 and "-h" not in sys.argv):
raise ValueError("Invalid arguments passed.")
for arg in sys.argv[1:]:
if arg == "-h":
mode = "#"
if arg == "-w":
mode = ""
else:
path = arg
for handle, tweets in sorted(list(getTweets(mode,path).items()), key=operator.itemgetter(1), reverse=True):
for handle, tweets in sorted(list(getTweets(mode=mode).items()), key=operator.itemgetter(1), reverse=True):
print(handle + "," + str(tweets))

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python3
import tools
import dbtools, setuptools
import sys, datetime
@ -20,10 +20,10 @@ def dateArgs(argv = sys.argv[1:]):
strings += [arg]
mode = 0
elif mode == 1:
fr = tools.getDate(arg)
fr = setuptools.getDate(arg)
mode = 0
else:
to = tools.getDate(arg)
to = setuptools.getDate(arg)
mode = 0
if mode in (1, 2):
@ -39,9 +39,7 @@ def queryBuilder(fr, to):
return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to)
def getDataByDate(fr, to, path = tools.dbpath(), headers = True):
db = tools.dbHelper(path)
def getDataByDate(fr, to, db = dbtools.dbHelper(), headers = True):
if fr == None:
fr = db.getFLDate()
if to == None:
@ -56,4 +54,4 @@ def getDataByDate(fr, to, path = tools.dbpath(), headers = True):
if __name__ == "__main__":
tools.printCSV(getDataByDate(*dateArgs(), headers = True))
setuptools.printCSV(getDataByDate(*dateArgs(), headers = True))

View file

@ -1,12 +1,10 @@
#!/usr/bin/env python3
import tools
import dbtools
import sys
def makeDB(path=tools.dbpath()):
db = tools.dbHelper(path, create = True)
def makeDB(db=dbtools.dbHelper()):
db.executeQuery("CREATE TABLE tweets(`tweet_id` INTEGER NOT NULL, `in_reply_to_status_id` TEXT, `in_reply_to_user_id` TEXT, `timestamp` TEXT, `source` TEXT, `text` TEXT, `retweeted_status_id` TEXT, `retweeted_status_user_id` TEXT, `retweeted_status_timestamp` TEXT, `expanded_urls` TEXT, PRIMARY KEY(tweet_id));")
db.commit()
@ -16,6 +14,6 @@ if __name__ == "__main__":
if len(sys.argv) > 2:
raise ValueError(sys.argv[0] + " only takes one argument, the path of the new database file.")
try:
makeDB(sys.argv[1])
makeDB(dbtools.dbObject(path=sys.argv[1]))
except IndexError:
makeDB()

View file

@ -1,20 +1,54 @@
import configparser, csv, datetime, itertools, os, sqlite3, sys, tweepy
import configparser, csv, datetime, html.parser, itertools, os, sqlite3, sys, tweepy
class SetupException(Exception):
def __str__(self):
return "Seems like config.cfg has not been created yet. Run setup.py to do so."
return "Seems like config.cfg has not been created yet or contains serious errors. Run setup.py to create it."
def getSetting(section, setting):
def getSetting(section, setting, path = "config.cfg"):
config = configparser.RawConfigParser()
config.read('config.cfg')
config.read(path)
return config.get(section, setting)
def dbtype():
try:
return int(getSetting("Database", "type"))
except:
return 0 # for SQLite3
### Must only be called AFTER dbtype()! ###
def dbhost():
try:
return getSetting("Database", "host")
except:
raise SetupException()
def dbuser():
try:
return getSetting("Database", "user")
except:
raise SetupException()
def dbpass():
try:
return getSetting("Database", "pass")
except:
raise SetupException()
def dbname():
try:
return getSetting("Database", "name")
except:
raise SetupException()
def dbpath():
try:
return getSetting("Database", "path")
except:
return "Database.db"
return SetupException()
###
def cke():
try:
@ -40,64 +74,6 @@ def ase():
except:
raise SetupException()
def user():
return twObject().whoami()
class dbObject:
def __init__(self, path=dbpath()):
self.conn = sqlite3.connect(path)
self.cur = self.conn.cursor()
self.path = path
def closeConnection(self):
return self.conn.close()
def commit(self):
return self.conn.commit()
def executeQuery(self, query):
return self.cur.execute(query)
def getNext(self):
return self.cur.fetchone()
def isInitialized(self):
try:
self.executeQuery("SELECT * FROM tweets")
return True
except:
return False
def getFLDate(self, val = 0):
if val == 0:
mode = "MIN"
else:
mode = "MAX"
return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0]))
class twObject:
def __init__(self, cke = cke(), cse = cse(), ato = ato(), ase = ase()):
self.auth = tweepy.OAuthHandler(cke, cse)
self.auth.set_access_token(ato, ase)
self.api = tweepy.API(self.auth)
def delete(self, id):
self.api.destroy_status(id)
def search(self, query, savepoint = 0):
tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items())
tweets.reverse()
return tweets
def whoami(self):
return self.auth.get_username()
def dbCheck(db, create = False):
if (not create and dbInitialized(db)) or (create and not dbInitialized(db)):
return True
@ -158,3 +134,6 @@ def printCSV(inlist):
writer = csv.writer(sys.stdout)
writer.writerows(inlist)
def unescapeText(text):
return html.parser.HTMLParser().unescape(text).replace("'","''")

32
twitools/__init__.py Normal file
View file

@ -0,0 +1,32 @@
import tweepy, setuptools
class twObject:
def __init__(self, cke = setuptools.cke(), cse = setuptools.cse(), ato = setuptools.ato(), ase = setuptools.ase()):
self.auth = tweepy.OAuthHandler(cke, cse)
self.auth.set_access_token(ato, ase)
self.api = tweepy.API(self.auth)
def delete(self, id):
self.api.destroy_status(id)
def search(self, query, savepoint = 0):
tweets = list(tweepy.Cursor(self.api.search, q=query, since_id=savepoint).items())
tweets.reverse()
return tweets
def whoami(self):
return self.auth.get_username()
def getFollowerIDs(two=twObject()):
''' Returns 5,000 follower IDs at most '''
return two.api.followers_ids(screen_name=twObject().whoami())
def getFollowingIDs(two=twObject()):
return two.api.friends_ids(screen_name=twObject().whoami())
def getNamesByIDs(fids=getFollowerIDs(), two=twObject()):
for page in setuptools.paginate(fids, 100):
followers = two.api.lookup_users(user_ids=page)
for follower in followers:
yield follower.screen_name