Add gettweets.py to get Tweet contents (by dates)

This commit is contained in:
Klaus-Uwe Mitterer 2015-05-14 16:29:55 +02:00
parent c66931d713
commit 79ba9cc96d
3 changed files with 79 additions and 20 deletions

View file

@ -4,12 +4,6 @@ import tools
import sys, datetime
def getDate(date):
try:
return datetime.datetime.strptime(date, '%Y-%m-%d')
except ValueError:
raise ValueError("Dates must be in YYYY-MM-DD format.")
def dateArgs(argv = sys.argv[1:]):
strings = []
fr = None
@ -36,10 +30,10 @@ def dateArgs(argv = sys.argv[1:]):
raise ValueError("Number of days for running average must be an integer.")
mode = 0
elif mode == 1:
fr = getDate(arg)
fr = tools.getDate(arg)
mode = 0
else:
to = getDate(arg)
to = tools.getDate(arg)
mode = 0
if mode in (1, 2):
@ -55,15 +49,6 @@ def queryBuilder(date, string = ""):
return "SELECT COUNT(*) FROM tweets WHERE SUBSTR(timestamp,0,11) = '%s' AND LOWER(text) LIKE '%%%s%%'" % (date, string)
def getFLDate(db, val = 0):
if val == 0:
mode = "MIN"
else:
mode = "MAX"
return getDate(str(list(db.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0]))
def dateList(fr, to):
return [[(fr+datetime.timedelta(days=i)).strftime('%Y-%m-%d')] for i in range((to+datetime.timedelta(days=1)-fr).days)]
@ -111,9 +96,9 @@ def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.con
db = tools.dbHelper(path)
if fr == None:
fr = getFLDate(db)
fr = db.getFLDate()
if to == None:
to = getFLDate(db, 1)
to = db.getFLDate(1)
cur = dateList(fr, to)

59
gettweets.py Executable file
View file

@ -0,0 +1,59 @@
#!/usr/bin/env python3
import tools
import sys, datetime
def dateArgs(argv = sys.argv[1:]):
fr = None
to = None
mode = 0
for arg in argv:
if mode == 0:
if arg == "-f":
mode = 1
elif arg == "-t":
mode = 2
else:
strings += [arg]
mode = 0
elif mode == 1:
fr = tools.getDate(arg)
mode = 0
else:
to = tools.getDate(arg)
mode = 0
if mode in (1, 2):
raise ValueError("Date missing.")
if to != None and fr != None and to < fr:
raise ValueError("From date must be before To date.")
return fr, to
def queryBuilder(fr, to):
return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to)
def getDataByDate(fr, to, path = tools.config.dbpath, headers = True):
db = tools.dbHelper(path)
if fr == None:
fr = db.getFLDate()
if to == None:
to = db.getFLDate(1)
cur = list(db.executeQuery(queryBuilder(fr, to)))
if headers:
pass
return cur
if __name__ == "__main__":
tools.printCSV(getDataByDate(*dateArgs(), headers = True))

View file

@ -1,6 +1,6 @@
import config
import csv, os, sqlite3, sys, tweepy
import csv, datetime, os, sqlite3, sys, tweepy
class dbObject:
@ -28,6 +28,14 @@ class dbObject:
except:
return False
def getFLDate(self, val = 0):
if val == 0:
mode = "MIN"
else:
mode = "MAX"
return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0]))
class twObject:
@ -64,6 +72,13 @@ def fileExists(path):
return os.path.isfile(path)
def getDate(date):
try:
return datetime.datetime.strptime(date, '%Y-%m-%d')
except ValueError:
raise ValueError("Dates must be in YYYY-MM-DD format.")
def parseArgs(argv):
args = []
path = None