#!/usr/bin/env python3 import dbtools, setuptools, twitools import argparse, html, markovify, nltk, operator, random, re, string, sys class Possy(markovify.NewlineText): def word_split(self, sentence): words = re.split(self.word_split_pattern, sentence) words = [ "::".join(tag) for tag in nltk.pos_tag(words) ] return words def word_join(self, words): sentence = " ".join(word.split("::")[0] for word in words) return sentence def sanitizeText(text): split = text.split() try: if "@" in (text[0], text[1]): if split[1][0] not in string.ascii_lowercase: return sanitizeText(text.partition(" ")[2]) if split[-1][0] == "@": return sanitizeText(" ".join(split[:-1])) if text[:4] == "RT @": return sanitizeText(text.partition(":")[2]) except: return "" return text def getText(db = dbtools.dbHelper()): text = "" for string in db.executeQuery('SELECT text FROM tweets;'): text += sanitizeText(string[0]) + "\n" return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()])) def markovifyText(text): return Possy(text).make_short_sentence(130).replace("@", "@​") if __name__ == "__main__": twitools.tweet(markovifyText(getText()), section = setuptools.MARKOV)