#!/usr/bin/env python3

import dbtools, setuptools, twitools, filters.markov
import argparse, datetime, html, markovify, nltk, operator, os, random, re, string, sys, time

class Possy(markovify.NewlineText):
 def word_split(self, sentence):
  words = re.split(self.word_split_pattern, sentence)
  words = [ "::".join(tag) for tag in nltk.pos_tag(words) ]
  return words

 def word_join(self, words):
  sentence = " ".join(word.split("::")[0] for word in words)
  return sentence

def sanitizeText(text):
 split = text.split()
 try:
  if text[0] == "@" or text[1] == "@":
   if split[1][0] not in string.ascii_lowercase:
    return sanitizeText(" ".join(split[1:]))
  if split[-1][0] == "@" and text[-1] in (string.digits + string.ascii_letters + "_"):
   return sanitizeText(" ".join(split[:-1]))
  if text[:4] == "RT @":
   return sanitizeText(text.partition(":")[2])
 except:
  return ""
 return text

def getText(db = dbtools.dbHelper()):
 text = ""
 for string in db.executeQuery('SELECT text FROM tweets;'):
  text += sanitizeText(string[0]) + "\n"
 return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()]))

def markovifyText(text):
 return Possy(text).make_short_sentence(130).replace("@", "@​")

if __name__ == "__main__":
 text = markovifyText(getText())
 outtext = filters.markov.textFilter(text)
 if outtext:
  filters.markov.tweetFilter(twitools.tweet(outtext, section = setuptools.MARKOV), text)