twitools/markov.py

#!/usr/bin/env python3

import dbtools, setuptools, twitools, filters.markov
import argparse, datetime, html, markovify, nltk, operator, os, random, re, string, sys, time

class Possy(markovify.NewlineText):
 def word_split(self, sentence):
  words = re.split(self.word_split_pattern, sentence)
  words = [ "::".join(tag) for tag in nltk.pos_tag(words) ]
  return words

 def word_join(self, words):
  sentence = " ".join(word.split("::")[0] for word in words)
  return sentence

def sanitizeText(text):
 split = text.split()
 try:
  if text[0] == "@" or text[1] == "@":
   if split[1][0] not in string.ascii_lowercase:
    return sanitizeText(" ".join(split[1:]))
  if split[-1][0] == "@" and text[-1] in (string.digits + string.ascii_letters + "_"):
   return sanitizeText(" ".join(split[:-1]))
  if text[:4] == "RT @":
   return sanitizeText(text.partition(":")[2])
 except:
  return ""
 return text

def getText(db = dbtools.dbHelper()):
 text = ""
 for string in db.executeQuery('SELECT text FROM tweets;'):
  text += sanitizeText(string[0]) + "\n"
 return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()]))

def markovifyText(text):
 return Possy(text).make_short_sentence(130).replace("@", "@")

if __name__ == "__main__":
 text = markovifyText(getText())
 outtext = filters.markov.textFilter(text)
 if outtext:
  filters.markov.tweetFilter(twitools.tweet(outtext, section = setuptools.MARKOV), text)
-												Markov bot

											
										
										
											2017-02-15 18:09:48 +00:00
+								#!/usr/bin/env python3
-												Create file for Markov bot filters#

											
										
										
											2017-03-08 01:43:00 +00:00
+								import dbtools, setuptools, twitools, filters.markov
-												Allow Markov bot to tweet exactly at 0/15/30/45

											
										
										
											2017-02-21 21:22:44 +00:00
+								import argparse, datetime, html, markovify, nltk, operator, os, random, re, string, sys, time
-												Markov bot

											
										
										
											2017-02-15 18:09:48 +00:00
-												Make Markov bot use NLTK.

											
										
										
											2017-02-15 21:19:20 +00:00
+								class Possy(markovify.NewlineText):
 								 def word_split(self, sentence):
 								  words = re.split(self.word_split_pattern, sentence)
 								  words = [ "::".join(tag) for tag in nltk.pos_tag(words) ]
 								  return words
 								 def word_join(self, words):
 								  sentence = " ".join(word.split("::")[0] for word in words)
 								  return sentence
-												Markov bot

											
										
										
											2017-02-15 18:09:48 +00:00
-												Use replies in Markov bot, removing starting/ending handles

											
										
										
											2017-02-18 21:14:35 +00:00
+								def sanitizeText(text):
-												Optimize text sanitation

											
										
										
											2017-02-21 15:10:39 +00:00
+								 split = text.split()
-												Use replies in Markov bot, removing starting/ending handles

											
										
										
											2017-02-18 21:14:35 +00:00
+								 try:
-												Trying something...

											
										
										
											2017-02-23 15:16:58 +00:00
+								  if text[0] == "@" or text[1] == "@":
-												Optimize text sanitation

											
										
										
											2017-02-21 15:10:39 +00:00
+								   if split[1][0] not in string.ascii_lowercase:
-												Shorter, possibly more efficient but who cares anyway

											
										
										
											2017-02-23 15:10:14 +00:00
+								    return sanitizeText(" ".join(split[1:]))
-												Eest fix

											
										
										
											2017-02-27 13:45:51 +00:00
+								  if split[-1][0] == "@" and text[-1] in (string.digits + string.ascii_letters + "_"):
-												Optimize text sanitation

											
										
										
											2017-02-21 15:10:39 +00:00
+								   return sanitizeText(" ".join(split[:-1]))
 								  if text[:4] == "RT @":
 								   return sanitizeText(text.partition(":")[2])
-												Use replies in Markov bot, removing starting/ending handles

											
										
										
											2017-02-18 21:14:35 +00:00
+								 except:
 								  return ""
 								 return text
-												Markov bot

											
										
										
											2017-02-15 18:09:48 +00:00
+								def getText(db = dbtools.dbHelper()):
-												Fix markov bot

											
										
										
											2017-02-15 18:16:19 +00:00
+								 text = ""
-												Optimize text sanitation

											
										
										
											2017-02-21 15:10:39 +00:00
+								 for string in db.executeQuery('SELECT text FROM tweets;'):
-												Use replies in Markov bot, removing starting/ending handles

											
										
										
											2017-02-18 21:14:35 +00:00
+								  text += sanitizeText(string[0]) + "\n"
-												Make Markov bot unescape strings

											
										
										
											2017-02-16 11:21:25 +00:00
+								 return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()]))
-												Markov bot

											
										
										
											2017-02-15 18:09:48 +00:00
-												Fix markov bot

											
										
										
											2017-02-15 18:16:19 +00:00
+								def markovifyText(text):
-												Make Markov bot use NLTK.

											
										
										
											2017-02-15 21:19:20 +00:00
+								 return Possy(text).make_short_sentence(130).replace("@", "@")
-												Markov bot

											
										
										
											2017-02-15 18:09:48 +00:00
 								if __name__ == "__main__":
-												Fix that.

											
										
										
											2017-02-21 21:26:09 +00:00
+								 text = markovifyText(getText())
-												Add post-handler for sent Markov tweets

											
										
										
											2017-03-08 23:48:09 +00:00
+								 outtext = filters.markov.textFilter(text)
 								 if outtext:
 								  filters.markov.tweetFilter(twitools.tweet(outtext, section = setuptools.MARKOV), text)