bots/RECbot/RECbot.py

398 lines
13 KiB
Python
Raw Normal View History

2021-01-10 16:10:13 +01:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# To run this bot:
# $ python3 logbot.py
# The output folder of this bot currently is: /var/www/logs/digital-autonomy
import logging
from getpass import getpass
from argparse import ArgumentParser
import slixmpp
import ssl, os, requests, urllib
from datetime import datetime
from bs4 import BeautifulSoup
import os, re, random
def check_handle(handle, used_handles):
if handle in used_handles:
handle_is_already_used = True
else:
handle_is_already_used = False
return handle_is_already_used
def request_handle(used_handles_path):
used_handles = open(used_handles_path, 'r').readlines()
handles = open('handles.txt', 'r').readlines()
handle = random.choice(handles).replace('\n','')
# check if handle is not used yet!
handle_is_already_used = False
if handle in used_handles:
handle_is_already_used = True
while check_handle(handle, used_handles) == True:
handle = random.choice(handles)
# add handle to .handles.txt
with open(used_handles_path, 'a+') as h:
h.write(handle)
return handle
2021-01-11 23:08:38 +01:00
def rec(self, entry):
2021-01-10 16:10:13 +01:00
output = self.output
used_handles = '.handles.txt'
used_handles_path = os.path.join(output, used_handles)
2021-01-11 23:08:38 +01:00
# save entry
2021-01-10 16:10:13 +01:00
handle = request_handle(used_handles_path)
2021-01-11 23:08:38 +01:00
newfile_path = output + '/entries/' + handle + '.txt'
with open(newfile_path, 'w') as f:
f.write(entry)
2021-01-10 16:10:13 +01:00
with open(used_handles_path, 'a+') as h:
h.write(handle)
2021-01-11 23:08:38 +01:00
print('Saved!')
def delete(self, handle):
used_handles = '.handles.txt'
used_handles_path = os.path.join(self.output, used_handles)
possible_file_paths = [
os.path.join(self.output, 'entries', handle + '.txt'),
os.path.join(self.output, 'entries', handle + '.png'),
os.path.join(self.output, 'entries', handle + '.jpg')
]
for path in possible_file_paths:
if os.path.isfile(path):
cmd = f'rm { path }'
print(f'> { cmd }')
os.system(cmd)
print(f'Removed { path }.')
pass
# delete handle from .handles.txt file
# read
with open(used_handles_path, 'r') as h:
txt = h.read()
txt = txt.replace(handle+'\n', "")
# write
with open(used_handles_path, 'w') as h:
h.write(txt)
# def write_to_log(self, entry):
# output = self.output
# # print(f'Output: { output }')
# log = 'index.html'
# css = 'stylesheet.css'
# used_handles = '.handles.txt'
# log_path = os.path.join(output, log)
# css_path = os.path.join(output, css)
# used_handles_path = os.path.join(output, used_handles)
# # check if file exists, if not: write it!
# if not os.path.isfile(log_path):
# html_template = open('templates/index.html', 'r').read()
# css_template = open('templates/stylesheet.css', 'r').read()
# with open(log_path, 'w') as l:
# l.write(html_template)
# l.write(f'<h1>{ self.groupchat }</h1>')
# with open(css_path, 'w') as c:
# c.write(css_template)
# with open(used_handles_path, 'w') as h:
# h.write('-----')
# # add entry to log
# handle = request_handle(used_handles_path)
# print(f'Picked a handle: { handle }')
# now = datetime.now().strftime('%A %d %B (%Y)')
# print(f'Now is: { now }')
# post = f'''<div id="{ handle }" class="post">
# <small class="postid">{ handle }</small>
# { entry }
# <small class="date">Added on { now }</small>
# </div>'''
# print(f'Post: { post }')
# with open(log_path, 'a+') as l:
# l.write(post)
# print('added to the log!')
# with open(used_handles_path, 'a+') as h:
# h.write(handle)
# print('added to the .handles file!')
2021-01-10 16:10:13 +01:00
# *spark
# add annotations
# def find_in_soup(self, handle, annotation):
# print('--------ADD ANNOTATION ---------')
# print(f'handle: { handle }')
# log = 'index.html'
# log_path = os.path.join(self.output, log)
# html = open(log_path, 'r').read()
# soup = BeautifulSoup(html, 'html.parser')
# # print(soup.prettify())
# post = soup.find(id=handle)
# # print(f'posts: { posts }')
# # for post in posts:
# print(f'post: { post }')
# if post:
# # annotationcontainer = post.findChildren(id="annotationcontainer", recursive=True)[0]
# # print(f'annotationcontainer: { annotationcontainer }')
# # print(f'annotationcontainer.contents: { annotationcontainer.contents }')
# # annotationcontainer.contents.append(f'<span class="annotation">{ annotation }</span>')
# # print(f'annotationcontainer.contents: { annotationcontainer.contents }')
2021-01-10 16:10:13 +01:00
# # new_annotation = soup.new_annotation("a", href="http://www.example.com")
# new_annotation = soup.new_annotation("span")
# new_annotation.append(annotation)
# soup.find(id=handle).find(class_="annotationcontainer").append(new_annotation)
# print(f'new soup: { str(soup) } ')
2021-01-10 16:10:13 +01:00
# # write soup to file
# with open(log_path, 'w') as l:
# l.write(str(soup))
2021-01-10 16:10:13 +01:00
class MUCBot(slixmpp.ClientXMPP):
"""
A simple Slixmpp bot that will save images
and messages that are marked with @bot to a folder.
"""
2021-01-11 23:08:38 +01:00
def __init__(self, use, password, groupchat, nickname, output, mode):
2021-01-10 16:10:13 +01:00
slixmpp.ClientXMPP.__init__(self, use, password)
self.groupchat = groupchat
self.nick = nickname
self.output = output
2021-01-11 23:08:38 +01:00
self.mode = mode
2021-01-10 16:10:13 +01:00
# The session_start event will be triggered when
# the bot establishes its connection with the server
# and the XML logs are ready for use. We want to
# listen for this event so that we we can initialize
# our roster.
self.add_event_handler("session_start", self.start)
# The groupchat_message event is triggered whenever a message
# stanza is received from any chat room. If you also also
# register a handler for the 'message' event, MUC messages
# will be processed by both handlers.
self.add_event_handler("groupchat_message", self.muc_message)
def start(self, event):
self.get_roster()
self.send_presence()
# https://xmpp.org/extensions/xep-0045.html
self.plugin['xep_0045'].join_muc(self.groupchat,
self.nick,
# If a room password is needed, use:
# password=the_room_password,
wait=True)
def muc_message(self, msg):
# Some inspection commands
#print('Message: {}'.format(msg))
# Always check that a message is not the bot itself, otherwise you will create an infinite loop responding to your own messages.
if msg['mucnick'] != self.nick:
if '@bot' in msg['body']:
# Send some info about this bot.
self.send_message(
mto=self.groupchat,
mbody='''Hello! RECbot here. I\'m a new version of logbot.
2021-01-10 16:10:13 +01:00
You can log type of text messages, by including __ADD__ in your message. Or, you can send an image/sound(*spark)/video(*spark) file to this chat and it will be logged for you.
2021-01-10 16:10:13 +01:00
Items in the log can also be deleted again, by using the unique HANDLE of each post. You can find these handles in the generated HTML page, they look like this: +//-*.
2021-01-10 16:10:13 +01:00
Happy logging!
2021-01-10 16:10:13 +01:00
PS. you can access these logs at https://vvvvvvaria.org/logs/.''',
mtype='groupchat'
)
2021-01-10 16:10:13 +01:00
else:
# Respond to incoming __ACTION_WORDS__!
2021-01-10 16:10:13 +01:00
# Check if output folder exists
if not os.path.exists(self.output):
os.mkdir(self.output)
2021-01-11 23:08:38 +01:00
os.mkdir(self.output + '/entries/')
with open(os.path.join(self.output, '.handles.txt'), 'w') as f:
pass
2021-01-10 16:10:13 +01:00
# Check if an OOB URL is included in the stanza (which is how an image is sent)
# (OOB object - https://xmpp.org/extensions/xep-0066.html#x-oob)
if len(msg['oob']['url']) > 0:
2021-01-10 16:10:13 +01:00
# Send a reply
self.send_message(mto=self.groupchat,
mbody="Super, our log is growing. Your image is added!",
mtype='groupchat')
2021-01-10 16:10:13 +01:00
# Save the image to the output folder
url = msg['oob']['url'] # grep the url in the message
filename = os.path.basename(url) # grep the filename in the url
output_path = os.path.join(self.output, filename)
u = urllib.request.urlopen(url) # read the image data
f = open(output_path, 'wb') # open the output file
f.write(u.read()) # write image to file
f.close() # close the output file
2021-01-10 16:10:13 +01:00
# Add the image to the log
img = f'<div class="entry image"><img src="{ filename }"></div>'
write_to_log(self, img)
2021-01-10 16:10:13 +01:00
# Include a new post in the log (only when '__ADD__' is used in the message)
if '__ADD__' in msg['body']:
2021-01-10 16:10:13 +01:00
# reply from the bot
self.send_message(mto=self.groupchat,
mbody=f'Noted! And added to the log. Thanks { msg["mucnick"] }!',
mtype='groupchat')
2021-01-10 16:10:13 +01:00
2021-01-11 23:08:38 +01:00
# Record the entry
rec(self, msg['body'])
2021-01-10 16:10:13 +01:00
2021-01-11 23:08:38 +01:00
# Delete a post from the log
if '__DELETE__' in msg['body']:
2021-01-10 16:10:13 +01:00
2021-01-11 23:08:38 +01:00
handle = re.findall("[aioeu][aioeu][aioeu][aioeu][aioeu]", msg['body'])[0]
2021-01-10 16:10:13 +01:00
# reply from the bot
self.send_message(mto=self.groupchat,
2021-01-11 23:08:38 +01:00
mbody=f'Noted! The following post is deleted from the log: { handle }',
mtype='groupchat')
2021-01-10 16:10:13 +01:00
2021-01-11 23:08:38 +01:00
# Delete the entry
delete(self, handle)
# Include a new post in the log (only when '__ADD__' is used in the message)
# if '__ANNOTATE__' in msg['body']:
# handle = msg['body'].split()[1]
# annotation = msg['body'].replace('__ANNOTATE__', '').replace(handle, '')
# post = find_in_soup(self, handle, annotation)
# # reply from the bot
# self.send_message(mto=self.groupchat,
# mbody="Thanks!",
# mtype='groupchat')
# Check if this is a book ...
if '__BOOK__' in msg['body']:
2021-01-10 16:10:13 +01:00
self.send_message(mto=self.groupchat,
mbody="Oh a book, that's cool! Thanks {}!".format(msg['mucnick']),
mtype='groupchat')
2021-01-10 16:10:13 +01:00
# Start of book feature
book = msg['body'].replace('@bot', '').replace('/book', '')
book = re.sub(' +', ' ', book) # remove double spaces
book = book.lstrip().rstrip() # remove spaces at the beginning and at the end
book = book.replace(' ', '+').lower() # turn space into + and lowercase
2021-01-10 16:10:13 +01:00
page_link = 'https://www.worldcat.org/search?q={}&qt=results_page'.format(book)
page_response = requests.get(page_link, timeout=5)
page_content = BeautifulSoup(page_response.content, "html.parser")
2021-01-10 16:10:13 +01:00
try:
book_title = page_content.findAll("div", {"class": "name"})[0].text
book_author = page_content.findAll("div", {"class": "author"})[0].text
book_publisher = page_content.findAll("div", {"class": "publisher"})[0].text
response = '<b>BOOK</b>: ' + book_title + ' ' + book_author + ' ' + book_publisher
book_found = True
except IndexError:
book_found = False
if book_found:
# Add message to log
message = '<b>BOOK</b>: ' + book_title + ' ' + book_author + ' ' + book_publisher
message = f'<div class="entry book">{ message }</div>'
write_to_log(self, message)
2021-01-10 16:10:13 +01:00
self.send_message(mto=self.groupchat, mbody='Hope this was the book you were looking for: ' + book_title + ' ' + book_author + ' ' + book_publisher, mtype='groupchat')
2021-01-10 16:10:13 +01:00
else:
2021-01-10 16:10:13 +01:00
self.send_message(mto=self.groupchat, mbody='Sorry, no book found!', mtype='groupchat')
2021-01-10 16:10:13 +01:00
2021-01-11 23:08:38 +01:00
# Generate HTML logfiles
# By default: log
if self.mode:
mode = str(self.mode.lower().strip())
if mode == "log":
print('> log.py')
elif mode == "stream":
print('> stream.py')
elif mode == "distribusi":
print('> distribusi.py')
2021-01-10 16:10:13 +01:00
if __name__ == '__main__':
# Setup the command line arguments.
parser = ArgumentParser()
# output verbosity options.
parser.add_argument("-q", "--quiet", help="set logging to ERROR",
action="store_const", dest="loglevel",
const=logging.ERROR, default=logging.INFO)
parser.add_argument("-d", "--debug", help="set logging to DEBUG",
action="store_const", dest="loglevel",
const=logging.DEBUG, default=logging.INFO)
# Different options.
parser.add_argument("-u", "--use", dest="use",
help="XMPP address to use")
parser.add_argument("-p", "--password", dest="password",
help="password to use")
parser.add_argument("-g", "--groupchat", dest="groupchat",
help="groupchat to join")
parser.add_argument("-n", "--nick", dest="nickname",
help="nickname for the bot")
parser.add_argument("-o", "--output", dest="output",
help="output folder, this is where the files are stored",
type=str)
2021-01-11 23:08:38 +01:00
parser.add_argument("-m", "--mode", dest="mode",
help="logmode, options include: log, stream, distribusi",
type=str, default='log')
2021-01-10 16:10:13 +01:00
args = parser.parse_args()
# Setup logging.
logging.basicConfig(level=args.loglevel,
format='%(levelname)-8s %(message)s')
if args.use is None:
args.use = input("Use this XMPP address for the bot: ")
if args.password is None:
args.password = getpass("Password: ")
if args.groupchat is None:
args.groupchat = input("Groupchat XMPP address: ")
if args.nickname is None:
args.nickname = input("Nickname for the bot: ")
if args.output is None:
args.output = input("Output folder path of the log: ")
# Setup the MUCBot and register plugins. Note that while plugins may
# have interdependencies, the order in which you register them does
# not matter.
2021-01-11 23:08:38 +01:00
xmpp = MUCBot(args.use, args.password, args.groupchat, args.nickname, args.output, args.mode)
2021-01-10 16:10:13 +01:00
xmpp.register_plugin('xep_0030') # Service Discovery
xmpp.register_plugin('xep_0045') # Multi-User Chat
xmpp.register_plugin('xep_0199') # XMPP Ping
xmpp.register_plugin('xep_0066') # Process URI's (files, images)
# Connect to the XMPP server and start processing XMPP stanzas.
xmpp.connect()
xmpp.process()