commit 89bccfba67da2f29a57dad51fb47a52646913eb3 Author: Klaus-Uwe Mitterer Date: Mon Mar 9 18:32:24 2015 +0100 Check in Twitter Database Tools diff --git a/datecsv.sh b/datecsv.sh new file mode 100755 index 0000000..75ac1b0 --- /dev/null +++ b/datecsv.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +DATAFILE=Database.db +OUTFILE=output.csv + +SQLITE="sqlite3 -csv -header" + +QUERY="SELECT SUBSTR(t.timestamp,0,11) AS 'Date', (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11)) AS 'Tweets'" + +for i in $@; + do QUERY="$QUERY, (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11) AND LOWER(e.text) LIKE '%${i,,}%') AS '$i'" +done + +QUERY="$QUERY FROM tweets t GROUP BY SUBSTR(t.timestamp,0,11);" +echo $QUERY + +$SQLITE $DATAFILE "$QUERY" > $OUTFILE diff --git a/filler.py b/filler.py new file mode 100755 index 0000000..03f91b7 --- /dev/null +++ b/filler.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import HTMLParser, sqlite3, time, tweepy, os + +user = "Username" +cke = "Consumer Key" +cse = "Consumer Secret" +ato = "Access Token" +ase = "Access Secret" + +search = "from:" + user + +database_filename = "Database.db" + +sql_conn = sqlite3.connect(database_filename) +cur = sql_conn.cursor() + +cur.execute("SELECT max(tweet_id) FROM tweets") +savepoint = int(cur.fetchone()[0]) + +print savepoint + +auth = tweepy.OAuthHandler(cke, cse) +auth.set_access_token(ato, ase) +api = tweepy.API(auth) + +timelineIterator = tweepy.Cursor(api.search, q=search, since_id=savepoint).items() + +timeline = [] + +for status in timelineIterator: + timeline.append(status) + +timeline.reverse() + +tw_counter = 0 + +for status in timeline: + print "(%(date)s) %(name)s: %(message)s\n" % \ + { "date" : status.created_at, + "name" : status.author.screen_name.encode('utf-8'), + "message" : status.text.encode('utf-8') } + + timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000" + text = HTMLParser.HTMLParser().unescape(status.text).replace("'", "''") + + cur.execute("INSERT INTO tweets('tweet_id','timestamp','text') VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')") + tw_counter = tw_counter + 1 + +sql_conn.commit() +sql_conn.close() + +print "Finished. %d Tweets stored" % (tw_counter) diff --git a/makedb.py b/makedb.py new file mode 100755 index 0000000..ccfd3e8 --- /dev/null +++ b/makedb.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os, sqlite3, sys + +try: + file = sys.argv[1] +except IndexError: + file = "Database.db" + +if os.path.isfile(file): + os.remove(file) + +conn = sqlite3.connect(file) +curs = conn.cursor() + +curs.execute("CREATE TABLE tweets(tweet_id numeric, timestamp text, text text);") + +conn.commit() +conn.close()