twitools/filler.py

143 lines
4.9 KiB
Python
Raw Normal View History

2015-04-13 20:58:32 +00:00
#!/usr/bin/env python3
2015-03-09 17:32:24 +00:00
2017-02-26 23:39:34 +00:00
import argparse, dbtools, dbtools.fillerfilter, requests, setuptools, time, twitools
def downloadMedia(url, tid, mid):
remote = requests.get(url, stream=True)
filename = "media/%s_%i.%s" % (str(tid), int(mid), url.split(".")[-1])
2017-02-26 23:39:34 +00:00
with open(filename, 'wb') as outfile:
for chunk in remote.iter_content(chunk_size=1024):
if chunk:
outfile.write(chunk)
outfile.flush()
2015-03-09 17:32:24 +00:00
def getTweets(db=dbtools.dbHelper(), user=twitools.twObject().whoami(), two=twitools.twObject()):
2015-04-22 00:06:32 +00:00
query = "from:" + user
savepoint = db.getLatestTweet() + 1
2015-04-22 00:06:32 +00:00
last = savepoint
2015-03-09 17:32:24 +00:00
2015-04-22 00:06:32 +00:00
timeline = two.search(query, savepoint)
2015-03-09 17:32:24 +00:00
tw_counter = 0
2015-03-09 17:32:24 +00:00
for status in timeline:
timestamp = status.created_at.strftime('%Y-%m-%d %H:%M:%S') + " +0000"
text = setuptools.unescapeText(status.text)
2015-04-22 00:06:32 +00:00
if dbtools.fillerfilter.tweetFilter(status):
try:
db.executeQuery("INSERT INTO tweets(tweet_id,timestamp,text) VALUES(" + str(status.id) + ",'" + timestamp + "','" + text + "')")
except:
2017-02-26 23:44:50 +00:00
print("Failed to insert %s into database." % str(status.id))
if 'media' in status.entities:
mid = 0
for m in status.entities['media']:
downloadMedia(m['media_url'], status.id, mid)
mid += 1
2015-03-09 17:32:24 +00:00
last = status.id
tw_counter = tw_counter + 1
db.commit()
2015-03-09 17:32:24 +00:00
return tw_counter, last, savepoint
2015-03-09 17:32:24 +00:00
def getMessages(db=dbtools.dbHelper(), two=twitools.twObject()):
mcount = 0
savepoint = db.getLatestMessage() + 1
new_messages = two.api.direct_messages(since_id=savepoint, count=200, full_text=True, include_entities=True)
new_out_messages = two.api.sent_direct_messages(since_id=savepoint, count=200, full_text=True, include_entities=True)
for m in (new_messages + new_out_messages):
if dbtools.fillerfilter.messageFilter(m, True):
try:
db.executeQuery("INSERT INTO messages VALUES(%s, '%s', %s, %s, '%s')" % (m.id, setuptools.unescapeText(m.text), m.sender_id, m.recipient_id, m.created_at))
mcount += 1
except:
pass
if 'media' in m.entities:
mid = 0
for med in m.entities['media']:
downloadMedia(med['media_url'], "m%i" % int(m.id), mid)
mid += 1
db.commit()
return mcount, savepoint or 0, db.getLatestMessage
def getFollowers(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False):
current = list(db.getFollowers())
new = list(twitools.getFollowerIDs())
gained = 0
lost = 0
if (len(current) == 0 or len(new) == 0) and not firstrun:
print("Something went wrong.")
return 0,0
for follower in new:
if follower not in current and dbtools.fillerfilter.followerFilter(follower, True):
db.executeQuery("INSERT INTO followers VALUES('%s', %i, 0)" % (str(follower), int(time.time())))
db.commit()
gained += 1
for follower in current:
if follower not in new and dbtools.fillerfilter.followerFilter(follower, False):
db.executeQuery("UPDATE followers SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), str(follower)))
db.commit()
lost += 1
return gained, lost
def getFollowing(db=dbtools.dbHelper(), two=twitools.twObject(), firstrun=False):
current = list(db.getFollowing())
new = list(twitools.getFollowingIDs())
gained = 0
lost = 0
if (len(current) == 0 or len(new) == 0) and not firstrun:
print("Something went wrong.")
return 0,0
for following in new:
if following not in current and dbtools.fillerfilter.followingFilter(following, True):
db.executeQuery("INSERT INTO following VALUES('%s', %i, 0)" % (str(following), int(time.time())))
db.commit()
gained += 1
for following in current:
if following not in new and dbtools.fillerfilter.followingFilter(following, False):
db.executeQuery("UPDATE following SET `until` = %i WHERE `id` = '%s' AND `until` = 0" % (int(time.time()), str(following)))
db.commit()
lost += 1
db.commit()
return gained, lost
def getNames(db = dbtools.dbHelper(), two = twitools.twObject()):
for user in twitools.getNamesByIDs(list(set(list(db.getFollowing()) + list(db.getFollowers())))):
if not db.matchNameID(user["name"], user["id"]):
2017-02-18 19:41:15 +00:00
db.executeQuery("UPDATE names SET `until` = %i WHERE `id` = '%s' AND `until` = 0;" % (int(time.time()), str(user["id"])))
db.executeQuery("INSERT INTO names VALUES('%s', '%s', %i, 0)" % (str(user["id"]), str(user["name"]), int(time.time())))
db.commit()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--first", help="first run: ignore empty databases", action="store_true")
args = parser.parse_args()
db = dbtools.dbHelper()
count, last, first = getTweets(db)
2016-08-01 23:17:08 +00:00
print("Stored %i tweets." % count)
count, last, first = getMessages(db)
2016-08-01 23:17:08 +00:00
print("Stored %i messages." % count)
gained, lost = getFollowers(db, firstrun=args.first)
print("Gained %i followers, lost %i." % (gained, lost))
gained, lost = getFollowing(db, firstrun=args.first)
print("Started following %i, stopped following %i." % (gained, lost))
getNames(db)
print("Stored handles of following/followers.")