twitools/markov.py
2017-02-16 12:21:25 +01:00

27 lines
860 B
Python
Executable file
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import dbtools, twitools
import argparse, html, markovify, nltk, operator, random, re, sys
class Possy(markovify.NewlineText):
def word_split(self, sentence):
words = re.split(self.word_split_pattern, sentence)
words = [ "::".join(tag) for tag in nltk.pos_tag(words) ]
return words
def word_join(self, words):
sentence = " ".join(word.split("::")[0] for word in words)
return sentence
def getText(db = dbtools.dbHelper()):
text = ""
for string in db.executeQuery('SELECT text FROM tweets WHERE text NOT LIKE "@%" AND text NOT LIKE "RT %";'):
text += string[0] + "\n"
return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()]))
def markovifyText(text):
return Possy(text).make_short_sentence(130).replace("@", "@")
if __name__ == "__main__":
twitools.tweet(markovifyText(getText()))