diff --git a/markov.py b/markov.py index 5aa5601..0eeb2ff 100755 --- a/markov.py +++ b/markov.py @@ -13,10 +13,20 @@ class Possy(markovify.NewlineText): sentence = " ".join(word.split("::")[0] for word in words) return sentence +def sanitizeText(text): + try: + if text[0] == "@": + return sanitizeText(text.partition(" ")[2]) + if text.split()[-1][0] == "@": + return sanitizeText(" ".join(text.split()[:-1])) + except: + return "" + return text + def getText(db = dbtools.dbHelper()): text = "" - for string in db.executeQuery('SELECT text FROM tweets WHERE text NOT LIKE "@%" AND text NOT LIKE "RT %";'): - text += string[0] + "\n" + for string in db.executeQuery('SELECT text FROM tweets WHERE text NOT LIKE "RT %";'): + text += sanitizeText(string[0]) + "\n" return html.unescape("".join([s for s in text.strip().splitlines(True) if s.strip()])) def markovifyText(text):