#!/usr/bin/env python3 # -*- coding: utf-8 -*- import config, html.parser, sqlite3, time, tweepy, os def fill(user=config.user,dbpath=config.dbpath): search = "from:" + user conn = sqlite3.connect(dbpath) cur = conn.cursor() try: cur.execute("SELECT max(tweet_id) FROM tweets") except: raise IOError("Database file " + dbpath + " does not exist or is in the wrong format. Please create it before trying to populate it.") try: savepoint = int(cur.fetchone()[0]) except: savepoint = 0 last = savepoint auth = tweepy.OAuthHandler(config.cke, config.cse) auth.set_access_token(config.ato, config.ase) api = tweepy.API(auth) timelineIterator = list(tweepy.Cursor(api.search, q=search, since_id=savepoint).items()) timeline = [] for status in timelineIterator: timeline.append(status) timeline.reverse() tw_counter = 0 for status in timeline: timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" text = html.parser.HTMLParser().unescape(status.text).replace("'", "''") cur.execute("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") last = status.id tw_counter = tw_counter + 1 conn.commit() conn.close() return tw_counter, last, savepoint if __name__ == "__main__": count, last, first = fill() print("Stored %i tweets after %i until %i." % (count, first, last))