From ca332951fa38c91d7f0eb73ea8415903a8eb13bd Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Mon, 27 Apr 2015 23:43:36 +0200 Subject: [PATCH] Prepare getdates for running average calculation Include days with 0 tweets in getdates output --- getdates.py | 113 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 67 insertions(+), 46 deletions(-) diff --git a/getdates.py b/getdates.py index 95ea00b..802e0b0 100755 --- a/getdates.py +++ b/getdates.py @@ -4,74 +4,95 @@ import tools import sys, datetime -def checkDate(date): +def getDate(date): try: - datetime.datetime.strptime(date, '%Y-%m-%d') - return True + return datetime.datetime.strptime(date, '%Y-%m-%d') except ValueError: - return False + raise ValueError("Dates must be in YYYY-MM-DD format.") -def dateArgs(argv = sys.argv): - fr = "" - to = "" +def dateArgs(argv = sys.argv[1:]): strings = [] + fr = None + to = None + av = 0 mode = 0 - for arg in argv[1:]: - if mode == 0: + for arg in argv: + if mode == 0 or (mode == 3 and not isinstance(arg,int)): if arg == "-f": mode = 1 elif arg == "-t": mode = 2 + elif arg == "-a": + mode = 3 else: strings += [arg] - elif checkDate(arg): - if mode == 1: - fr = arg mode = 0 - else: - to = arg + elif mode == 3: + av = arg + mode = 0 + elif mode == 1: + fr = getDate(arg) mode = 0 else: - raise ValueError("Dates must be in YYYY-MM-DD format.") + to = getDate(arg) + mode = 0 - if not mode == 0: + if mode in (1, 2): raise ValueError("Date missing.") - return strings, fr, to + + if to != None and fr != None and to < fr: + raise ValueError("From date must be before To date.") + + return strings, fr, to, av -def queryBuilder(strings = [], fr = "", to = ""): - query = "SELECT SUBSTR(t.timestamp,0,11) AS 'Date', (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11)) AS 'Tweets'" - - for string in strings: - query += ", (SELECT COUNT(*) FROM tweets e WHERE SUBSTR(e.timestamp,0,11) = SUBSTR(t.timestamp,0,11) AND LOWER(e.text) LIKE '%" + string.lower() + "%') AS '" + string + "'" - - query += "FROM tweets t " - status = 0 - - if not fr == "": - status += 1 - - if not to == "": - status += 2 - - if status > 0: - query += "WHERE " - if status in (1,3): - query += "SUBSTR(t.timestamp,0,11) >= '" + fr + "' " - if status == 3: - query += "AND " - if status > 1: - query += "SUBSTR(t.timestamp,0,11) <= '" + to + "' " - - return query + "GROUP BY SUBSTR(t.timestamp,0,11)" +def queryBuilder(date, string = ""): + return "SELECT COUNT(*) FROM tweets WHERE SUBSTR(timestamp,0,11) = '%s' AND LOWER(text) LIKE '%%%s%%'" % (date, string) -def getTweetsByDate(strings = [], path = tools.config.dbpath, fr = "", to = ""): +def getFLDate(db, val = 0): + if val == 0: + mode = "MIN" + else: + mode = "MAX" + + return getDate(str(db.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode)[0][0])) + + +def dateList(fr, to): + return [[(fr+datetime.timedelta(days=i)).strftime('%Y-%m-%d')] for i in range((to+datetime.timedelta(days=1)-fr).days)] + + +def fillList(db, string, cur, av): + for day in cur: + day += db.executeQuery(queryBuilder(day[0], string))[0] + + return cur + + +def getHeaders(strings, av): + return [["Date", "Tweets"] + [string for string in strings]] + + +def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.config.dbpath, headers = False): db = tools.dbHelper(path) - return list(db.executeQuery(queryBuilder(strings,fr,to))) + + if fr == None: + fr = getFLDate(db) + if to == None: + to = getFLDate(db, 1) + + cur = dateList(fr, to) + + for string in [""] + strings: + cur = fillList(db, string, cur, av) + + if headers: + cur = getHeaders(strings, av) + cur + + return cur if __name__ == "__main__": - strings, fr, to = dateArgs() - tools.printCSV([["Date", "Tweets"] + [s for s in strings]] + getTweetsByDate(strings = strings, fr = fr, to = to)) + tools.printCSV(getTweetsByDate(*dateArgs(), headers = True))