twitools/filler.py

55 lines
1.4 KiB
Python
Raw Normal View History

2015-04-13 20:58:32 +00:00
#!/usr/bin/env python3
2015-03-09 17:32:24 +00:00
# -*- coding: utf-8 -*-
import config, html.parser, sqlite3, time, tweepy, os
2015-03-09 17:32:24 +00:00
def fill(user=config.user,dbpath=config.dbpath):
search = "from:" + user
2015-03-09 17:32:24 +00:00
conn = sqlite3.connect(dbpath)
cur = conn.cursor()
2015-03-09 17:32:24 +00:00
try:
cur.execute("SELECT max(tweet_id) FROM tweets")
except:
raise IOError("Database file " + dbpath + " does not exist or is in the wrong format. Please create it before trying to populate it.")
2015-03-09 17:32:24 +00:00
try:
savepoint = int(cur.fetchone()[0])
except:
savepoint = 0
2015-03-09 17:32:24 +00:00
last = savepoint
auth = tweepy.OAuthHandler(config.cke, config.cse)
auth.set_access_token(config.ato, config.ase)
api = tweepy.API(auth)
2015-03-09 17:32:24 +00:00
timelineIterator = list(tweepy.Cursor(api.search, q=search, since_id=savepoint).items())
2015-03-09 17:32:24 +00:00
timeline = []
2015-03-09 17:32:24 +00:00
for status in timelineIterator:
timeline.append(status)
2015-03-09 17:32:24 +00:00
timeline.reverse()
2015-03-09 17:32:24 +00:00
tw_counter = 0
2015-03-09 17:32:24 +00:00
for status in timeline:
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
text = html.parser.HTMLParser().unescape(status.text).replace("'", "''")
2015-03-09 17:32:24 +00:00
cur.execute("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
last = status.id
tw_counter = tw_counter + 1
2015-03-09 17:32:24 +00:00
conn.commit()
conn.close()
2015-03-09 17:32:24 +00:00
return tw_counter, last, savepoint
2015-03-09 17:32:24 +00:00
if __name__ == "__main__":
count, last, first = fill()
print("Stored %i tweets after %i until %i." % (count, first, last))