From 79ba9cc96d74919f3e56df4ff7c5af55a0625a01 Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Thu, 14 May 2015 16:29:55 +0200 Subject: [PATCH] Add gettweets.py to get Tweet contents (by dates) --- getdates.py | 23 ++++---------------- gettweets.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++ tools.py | 17 ++++++++++++++- 3 files changed, 79 insertions(+), 20 deletions(-) create mode 100755 gettweets.py diff --git a/getdates.py b/getdates.py index 421cb61..0c77141 100755 --- a/getdates.py +++ b/getdates.py @@ -4,12 +4,6 @@ import tools import sys, datetime -def getDate(date): - try: - return datetime.datetime.strptime(date, '%Y-%m-%d') - except ValueError: - raise ValueError("Dates must be in YYYY-MM-DD format.") - def dateArgs(argv = sys.argv[1:]): strings = [] fr = None @@ -36,10 +30,10 @@ def dateArgs(argv = sys.argv[1:]): raise ValueError("Number of days for running average must be an integer.") mode = 0 elif mode == 1: - fr = getDate(arg) + fr = tools.getDate(arg) mode = 0 else: - to = getDate(arg) + to = tools.getDate(arg) mode = 0 if mode in (1, 2): @@ -55,15 +49,6 @@ def queryBuilder(date, string = ""): return "SELECT COUNT(*) FROM tweets WHERE SUBSTR(timestamp,0,11) = '%s' AND LOWER(text) LIKE '%%%s%%'" % (date, string) -def getFLDate(db, val = 0): - if val == 0: - mode = "MIN" - else: - mode = "MAX" - - return getDate(str(list(db.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) - - def dateList(fr, to): return [[(fr+datetime.timedelta(days=i)).strftime('%Y-%m-%d')] for i in range((to+datetime.timedelta(days=1)-fr).days)] @@ -111,9 +96,9 @@ def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.con db = tools.dbHelper(path) if fr == None: - fr = getFLDate(db) + fr = db.getFLDate() if to == None: - to = getFLDate(db, 1) + to = db.getFLDate(1) cur = dateList(fr, to) diff --git a/gettweets.py b/gettweets.py new file mode 100755 index 0000000..23f1d88 --- /dev/null +++ b/gettweets.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 + +import tools + +import sys, datetime + +def dateArgs(argv = sys.argv[1:]): + fr = None + to = None + + mode = 0 + + for arg in argv: + if mode == 0: + if arg == "-f": + mode = 1 + elif arg == "-t": + mode = 2 + else: + strings += [arg] + mode = 0 + elif mode == 1: + fr = tools.getDate(arg) + mode = 0 + else: + to = tools.getDate(arg) + mode = 0 + + if mode in (1, 2): + raise ValueError("Date missing.") + + if to != None and fr != None and to < fr: + raise ValueError("From date must be before To date.") + + return fr, to + + +def queryBuilder(fr, to): + return "SELECT * FROM tweets WHERE SUBSTR(timestamp,0,11) >= '%s' AND SUBSTR(timestamp,0,11) <= '%s'" % (fr, to) + + +def getDataByDate(fr, to, path = tools.config.dbpath, headers = True): + db = tools.dbHelper(path) + + if fr == None: + fr = db.getFLDate() + if to == None: + to = db.getFLDate(1) + + cur = list(db.executeQuery(queryBuilder(fr, to))) + + if headers: + pass + + return cur + + +if __name__ == "__main__": + tools.printCSV(getDataByDate(*dateArgs(), headers = True)) diff --git a/tools.py b/tools.py index aab8158..a3863b3 100644 --- a/tools.py +++ b/tools.py @@ -1,6 +1,6 @@ import config -import csv, os, sqlite3, sys, tweepy +import csv, datetime, os, sqlite3, sys, tweepy class dbObject: @@ -28,6 +28,14 @@ class dbObject: except: return False + def getFLDate(self, val = 0): + if val == 0: + mode = "MIN" + else: + mode = "MAX" + + return getDate(str(list(self.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode))[0][0])) + class twObject: @@ -64,6 +72,13 @@ def fileExists(path): return os.path.isfile(path) +def getDate(date): + try: + return datetime.datetime.strptime(date, '%Y-%m-%d') + except ValueError: + raise ValueError("Dates must be in YYYY-MM-DD format.") + + def parseArgs(argv): args = [] path = None