#!/usr/bin/env python3 import dbtools, setuptools, twitools, filters.markov import argparse, datetime, html, markovify, nltk, operator, os, random, re, string, sys, time class Possy(markovify.NewlineText): def word_split(self, sentence): words = re.split(self.word_split_pattern, sentence) words = [ "::".join(tag) for tag in nltk.pos_tag(words) ] return words def word_join(self, words): sentence = " ".join(word.split("::")[0] for word in words) return sentence def sanitizeText(text): split = text.split() try: if text[0] == "@" or text[1] == "@": if split[1][0] not in string.ascii_lowercase: return sanitizeText(" ".join(split[1:])) if split[-1][0] == "@" and text[-1] in (string.digits + string.ascii_letters + "_"): return sanitizeText(" ".join(split[:-1])) if text[:4] == "RT @": return sanitizeText(text.partition(":")[2]) except: return "" return text def getText(db = dbtools.dbHelper()): text = "" for string in db.executeQuery('SELECT text FROM tweets;'): text += sanitizeText(string[0]) + "\n" return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()])) def markovifyText(text): return Possy(text).make_short_sentence(130).replace("@", "@​") if __name__ == "__main__": text = markovifyText(getText()) outtext = filters.markov.textFilter(text) if outtext: filters.markov.tweetFilter(twitools.tweet(outtext, section = setuptools.MARKOV), text)