#!/usr/bin/env python3 import dbtools import setuptools import twitools import filters.markov import argparse import datetime import html import markovify import multiprocessing import nltk import operator import os import random import re import string import sys import time class Possy(markovify.NewlineText): def word_split(self, sentence): words = re.split(self.word_split_pattern, sentence) words = [ "::".join(tag) for tag in nltk.pos_tag(words) ] return words def word_join(self, words): sentence = " ".join(word.split("::")[0] for word in words) return sentence def sanitizeText(text): split = text.split() try: if text[0] == "@" or text[1] == "@": if split[1][0] not in string.ascii_lowercase: return sanitizeText(" ".join(split[1:])) if split[-1][0] == "@" and text[-1] in (string.digits + string.ascii_letters + "_"): return sanitizeText(" ".join(split[:-1])) if text[:4] == "RT @": return sanitizeText(text.partition(":")[2]) except: return "" return text def getText(db = dbtools.dbHelper()): text = "" for string in db.executeQuery('SELECT text FROM tweets;'): text += sanitizeText(string[0]) + "\n" return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()])) def markovifyText(text): return Possy(text).make_short_sentence(270).replace("@", "@​") def worker(): text = markovifyText(getText()) outtext = filters.markov.textFilter(text) if outtext: filters.markov.tweetFilter(twitools.tweet(outtext, section = setuptools.MARKOV), text) if __name__ == "__main__": p = multiprocessing.Process(target=worker) p.start() time.sleep(20*60) if p.is_alive(): p.terminate() p.join()