From 96269a71e25da341a488b55ff9c189eae168aacb Mon Sep 17 00:00:00 2001 From: Klaus-Uwe Mitterer Date: Wed, 15 Feb 2017 22:19:20 +0100 Subject: [PATCH] Make Markov bot use NLTK. --- markov.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/markov.py b/markov.py index 35cf8c6..5f8da99 100755 --- a/markov.py +++ b/markov.py @@ -1,16 +1,26 @@ #!/usr/bin/env python3 import dbtools, twitools -import argparse, markovify, operator, random, re, sys +import argparse, markovify, nltk, operator, random, re, sys + +class Possy(markovify.NewlineText): + def word_split(self, sentence): + words = re.split(self.word_split_pattern, sentence) + words = [ "::".join(tag) for tag in nltk.pos_tag(words) ] + return words + + def word_join(self, words): + sentence = " ".join(word.split("::")[0] for word in words) + return sentence def getText(db = dbtools.dbHelper()): text = "" for string in db.executeQuery("SELECT text FROM tweets;"): text += string[0] + "\n" - return text + return "".join([s for s in text.strip().splitlines(True) if s.strip()]) def markovifyText(text): - return markovify.NewlineText(text).make_short_sentence(130).replace("@", "@​") + return Possy(text).make_short_sentence(130).replace("@", "@​") def tweet(text, ref = 0, two = twitools.twObject()): return two.tweet(text, ref).id