twitools/markov.py

#!/usr/bin/env python3

import dbtools, setuptools, twitools
import argparse, html, markovify, nltk, operator, random, re, string, sys

class Possy(markovify.NewlineText):
 def word_split(self, sentence):
  words = re.split(self.word_split_pattern, sentence)
  words = [ "::".join(tag) for tag in nltk.pos_tag(words) ]
  return words

 def word_join(self, words):
  sentence = " ".join(word.split("::")[0] for word in words)
  return sentence

def sanitizeText(text):
 split = text.split()
 try:
  if "@" in (text[0], text[1]):
   if split[1][0] not in string.ascii_lowercase:
    return sanitizeText(text.partition(" ")[2])
  if split[-1][0] == "@":
   return sanitizeText(" ".join(split[:-1]))
  if text[:4] == "RT @":
   return sanitizeText(text.partition(":")[2])
 except:
  return ""
 return text

def getText(db = dbtools.dbHelper()):
 text = ""
 for string in db.executeQuery('SELECT text FROM tweets;'):
  text += sanitizeText(string[0]) + "\n"
 return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()]))

def markovifyText(text):
 return Possy(text).make_short_sentence(130).replace("@", "@")

if __name__ == "__main__":
 twitools.tweet(markovifyText(getText()), section = setuptools.MARKOV)