#!/usr/bin/env python3 import tools import sys, datetime def getDate(date): try: return datetime.datetime.strptime(date, '%Y-%m-%d') except ValueError: raise ValueError("Dates must be in YYYY-MM-DD format.") def dateArgs(argv = sys.argv[1:]): strings = [] fr = None to = None av = 0 mode = 0 for arg in argv: if mode == 0 or (mode == 3 and not isinstance(arg,int)): if arg == "-f": mode = 1 elif arg == "-t": mode = 2 elif arg == "-a": mode = 3 else: strings += [arg] mode = 0 elif mode == 3: av = arg mode = 0 elif mode == 1: fr = getDate(arg) mode = 0 else: to = getDate(arg) mode = 0 if mode in (1, 2): raise ValueError("Date missing.") if to != None and fr != None and to < fr: raise ValueError("From date must be before To date.") return strings, fr, to, av def queryBuilder(date, string = ""): return "SELECT COUNT(*) FROM tweets WHERE SUBSTR(timestamp,0,11) = '%s' AND LOWER(text) LIKE '%%%s%%'" % (date, string) def getFLDate(db, val = 0): if val == 0: mode = "MIN" else: mode = "MAX" return getDate(str(db.executeQuery("SELECT %s(SUBSTR(timestamp,0,11)) FROM tweets" % mode)[0][0])) def dateList(fr, to): return [[(fr+datetime.timedelta(days=i)).strftime('%Y-%m-%d')] for i in range((to+datetime.timedelta(days=1)-fr).days)] def fillList(db, string, cur, av): for day in cur: day += db.executeQuery(queryBuilder(day[0], string))[0] return cur def getHeaders(strings, av): return [["Date", "Tweets"] + [string for string in strings]] def getTweetsByDate(strings = [], fr = None, to = None, av = 0, path = tools.config.dbpath, headers = False): db = tools.dbHelper(path) if fr == None: fr = getFLDate(db) if to == None: to = getFLDate(db, 1) cur = dateList(fr, to) for string in [""] + strings: cur = fillList(db, string, cur, av) if headers: cur = getHeaders(strings, av) + cur return cur if __name__ == "__main__": tools.printCSV(getTweetsByDate(*dateArgs(), headers = True))