twitools/markov.py

#!/usr/bin/env python3

import dbtools, setuptools, twitools
import argparse, html, markovify, nltk, operator, random, re, sys

class Possy(markovify.NewlineText):
 def word_split(self, sentence):
  words = re.split(self.word_split_pattern, sentence)
  words = [ "::".join(tag) for tag in nltk.pos_tag(words) ]
  return words

 def word_join(self, words):
  sentence = " ".join(word.split("::")[0] for word in words)
  return sentence

def sanitizeText(text):
 try:
  if text[0] == "@":
   return sanitizeText(text.partition(" ")[2])
  if text.split()[-1][0] == "@":
   return sanitizeText(" ".join(text.split()[:-1]))
 except:
  return ""
 return text

def getText(db = dbtools.dbHelper()):
 text = ""
 for string in db.executeQuery('SELECT text FROM tweets WHERE text NOT LIKE "RT %";'):
  text += sanitizeText(string[0]) + "\n"
 return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()]))

def markovifyText(text):
 return Possy(text).make_short_sentence(130).replace("@", "@")

if __name__ == "__main__":
 twitools.tweet(markovifyText(getText()), section = setuptools.MARKOV)