2021-01-10 16:10:13 +01:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# To run this bot:
# $ python3 logbot.py
# The output folder of this bot currently is: /var/www/logs/digital-autonomy
import logging
from getpass import getpass
from argparse import ArgumentParser
import slixmpp
import ssl , os , requests , urllib
from datetime import datetime
from bs4 import BeautifulSoup
import os , re , random
def check_handle ( handle , used_handles ) :
if handle in used_handles :
handle_is_already_used = True
else :
handle_is_already_used = False
return handle_is_already_used
def request_handle ( used_handles_path ) :
used_handles = open ( used_handles_path , ' r ' ) . readlines ( )
handles = open ( ' handles.txt ' , ' r ' ) . readlines ( )
handle = random . choice ( handles ) . replace ( ' \n ' , ' ' )
# check if handle is not used yet!
handle_is_already_used = False
if handle in used_handles :
handle_is_already_used = True
while check_handle ( handle , used_handles ) == True :
handle = random . choice ( handles )
# add handle to .handles.txt
with open ( used_handles_path , ' a+ ' ) as h :
h . write ( handle )
return handle
2021-01-11 23:08:38 +01:00
def rec ( self , entry ) :
2021-01-10 16:10:13 +01:00
output = self . output
used_handles = ' .handles.txt '
used_handles_path = os . path . join ( output , used_handles )
2021-01-11 23:08:38 +01:00
# save entry
2021-01-10 16:10:13 +01:00
handle = request_handle ( used_handles_path )
2021-01-11 23:08:38 +01:00
newfile_path = output + ' /entries/ ' + handle + ' .txt '
with open ( newfile_path , ' w ' ) as f :
f . write ( entry )
2021-01-10 16:10:13 +01:00
with open ( used_handles_path , ' a+ ' ) as h :
h . write ( handle )
2021-01-11 23:08:38 +01:00
print ( ' Saved! ' )
def delete ( self , handle ) :
used_handles = ' .handles.txt '
used_handles_path = os . path . join ( self . output , used_handles )
possible_file_paths = [
os . path . join ( self . output , ' entries ' , handle + ' .txt ' ) ,
os . path . join ( self . output , ' entries ' , handle + ' .png ' ) ,
os . path . join ( self . output , ' entries ' , handle + ' .jpg ' )
]
for path in possible_file_paths :
if os . path . isfile ( path ) :
cmd = f ' rm { path } '
print ( f ' > { cmd } ' )
os . system ( cmd )
print ( f ' Removed { path } . ' )
pass
# delete handle from .handles.txt file
# read
with open ( used_handles_path , ' r ' ) as h :
txt = h . read ( )
txt = txt . replace ( handle + ' \n ' , " " )
# write
with open ( used_handles_path , ' w ' ) as h :
h . write ( txt )
# def write_to_log(self, entry):
# output = self.output
# # print(f'Output: { output }')
# log = 'index.html'
# css = 'stylesheet.css'
# used_handles = '.handles.txt'
# log_path = os.path.join(output, log)
# css_path = os.path.join(output, css)
# used_handles_path = os.path.join(output, used_handles)
# # check if file exists, if not: write it!
# if not os.path.isfile(log_path):
# html_template = open('templates/index.html', 'r').read()
# css_template = open('templates/stylesheet.css', 'r').read()
# with open(log_path, 'w') as l:
# l.write(html_template)
# l.write(f'<h1>{ self.groupchat }</h1>')
# with open(css_path, 'w') as c:
# c.write(css_template)
# with open(used_handles_path, 'w') as h:
# h.write('-----')
# # add entry to log
# handle = request_handle(used_handles_path)
# print(f'Picked a handle: { handle }')
# now = datetime.now().strftime('%A %d %B (%Y)')
# print(f'Now is: { now }')
# post = f'''<div id="{ handle }" class="post">
# <small class="postid">{ handle }</small>
# { entry }
# <small class="date">Added on { now }</small>
# </div>'''
# print(f'Post: { post }')
# with open(log_path, 'a+') as l:
# l.write(post)
# print('added to the log!')
# with open(used_handles_path, 'a+') as h:
# h.write(handle)
# print('added to the .handles file!')
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
# *spark
# add annotations
# def find_in_soup(self, handle, annotation):
# print('--------ADD ANNOTATION ---------')
# print(f'handle: { handle }')
# log = 'index.html'
# log_path = os.path.join(self.output, log)
# html = open(log_path, 'r').read()
# soup = BeautifulSoup(html, 'html.parser')
# # print(soup.prettify())
# post = soup.find(id=handle)
# # print(f'posts: { posts }')
# # for post in posts:
# print(f'post: { post }')
# if post:
# # annotationcontainer = post.findChildren(id="annotationcontainer", recursive=True)[0]
# # print(f'annotationcontainer: { annotationcontainer }')
# # print(f'annotationcontainer.contents: { annotationcontainer.contents }')
# # annotationcontainer.contents.append(f'<span class="annotation">{ annotation }</span>')
# # print(f'annotationcontainer.contents: { annotationcontainer.contents }')
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
# # new_annotation = soup.new_annotation("a", href="http://www.example.com")
# new_annotation = soup.new_annotation("span")
# new_annotation.append(annotation)
# soup.find(id=handle).find(class_="annotationcontainer").append(new_annotation)
# print(f'new soup: { str(soup) } ')
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
# # write soup to file
# with open(log_path, 'w') as l:
# l.write(str(soup))
2021-01-10 16:10:13 +01:00
class MUCBot ( slixmpp . ClientXMPP ) :
"""
A simple Slixmpp bot that will save images
and messages that are marked with @bot to a folder .
"""
2021-01-11 23:08:38 +01:00
def __init__ ( self , use , password , groupchat , nickname , output , mode ) :
2021-01-10 16:10:13 +01:00
slixmpp . ClientXMPP . __init__ ( self , use , password )
self . groupchat = groupchat
self . nick = nickname
self . output = output
2021-01-11 23:08:38 +01:00
self . mode = mode
2021-01-10 16:10:13 +01:00
# The session_start event will be triggered when
# the bot establishes its connection with the server
# and the XML logs are ready for use. We want to
# listen for this event so that we we can initialize
# our roster.
self . add_event_handler ( " session_start " , self . start )
# The groupchat_message event is triggered whenever a message
# stanza is received from any chat room. If you also also
# register a handler for the 'message' event, MUC messages
# will be processed by both handlers.
self . add_event_handler ( " groupchat_message " , self . muc_message )
def start ( self , event ) :
self . get_roster ( )
self . send_presence ( )
# https://xmpp.org/extensions/xep-0045.html
self . plugin [ ' xep_0045 ' ] . join_muc ( self . groupchat ,
self . nick ,
# If a room password is needed, use:
# password=the_room_password,
wait = True )
def muc_message ( self , msg ) :
# Some inspection commands
#print('Message: {}'.format(msg))
# Always check that a message is not the bot itself, otherwise you will create an infinite loop responding to your own messages.
if msg [ ' mucnick ' ] != self . nick :
2021-01-10 17:10:38 +01:00
if ' @bot ' in msg [ ' body ' ] :
# Send some info about this bot.
self . send_message (
mto = self . groupchat ,
mbody = ''' Hello! RECbot here. I \' m a new version of logbot.
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
You can log type of text messages , by including __ADD__ in your message . Or , you can send an image / sound ( * spark ) / video ( * spark ) file to this chat and it will be logged for you .
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
Items in the log can also be deleted again , by using the unique HANDLE of each post . You can find these handles in the generated HTML page , they look like this : + / / - * .
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
Happy logging !
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
PS . you can access these logs at https : / / vvvvvvaria . org / logs / . ''' ,
mtype = ' groupchat '
)
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
else :
# Respond to incoming __ACTION_WORDS__!
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
# Check if output folder exists
if not os . path . exists ( self . output ) :
os . mkdir ( self . output )
2021-01-11 23:08:38 +01:00
os . mkdir ( self . output + ' /entries/ ' )
with open ( os . path . join ( self . output , ' .handles.txt ' ) , ' w ' ) as f :
pass
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
# Check if an OOB URL is included in the stanza (which is how an image is sent)
# (OOB object - https://xmpp.org/extensions/xep-0066.html#x-oob)
if len ( msg [ ' oob ' ] [ ' url ' ] ) > 0 :
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
# Send a reply
self . send_message ( mto = self . groupchat ,
mbody = " Super, our log is growing. Your image is added! " ,
mtype = ' groupchat ' )
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
# Save the image to the output folder
url = msg [ ' oob ' ] [ ' url ' ] # grep the url in the message
filename = os . path . basename ( url ) # grep the filename in the url
output_path = os . path . join ( self . output , filename )
u = urllib . request . urlopen ( url ) # read the image data
f = open ( output_path , ' wb ' ) # open the output file
f . write ( u . read ( ) ) # write image to file
f . close ( ) # close the output file
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
# Add the image to the log
img = f ' <div class= " entry image " ><img src= " { filename } " ></div> '
write_to_log ( self , img )
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
# Include a new post in the log (only when '__ADD__' is used in the message)
if ' __ADD__ ' in msg [ ' body ' ] :
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
# reply from the bot
self . send_message ( mto = self . groupchat ,
mbody = f ' Noted! And added to the log. Thanks { msg [ " mucnick " ] } ! ' ,
mtype = ' groupchat ' )
2021-01-10 16:10:13 +01:00
2021-01-11 23:08:38 +01:00
# Record the entry
rec ( self , msg [ ' body ' ] )
2021-01-10 16:10:13 +01:00
2021-01-11 23:08:38 +01:00
# Delete a post from the log
if ' __DELETE__ ' in msg [ ' body ' ] :
2021-01-10 16:10:13 +01:00
2021-01-11 23:08:38 +01:00
handle = re . findall ( " [aioeu][aioeu][aioeu][aioeu][aioeu] " , msg [ ' body ' ] ) [ 0 ]
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
# reply from the bot
self . send_message ( mto = self . groupchat ,
2021-01-11 23:08:38 +01:00
mbody = f ' Noted! The following post is deleted from the log: { handle } ' ,
2021-01-10 17:10:38 +01:00
mtype = ' groupchat ' )
2021-01-10 16:10:13 +01:00
2021-01-11 23:08:38 +01:00
# Delete the entry
delete ( self , handle )
# Include a new post in the log (only when '__ADD__' is used in the message)
# if '__ANNOTATE__' in msg['body']:
# handle = msg['body'].split()[1]
# annotation = msg['body'].replace('__ANNOTATE__', '').replace(handle, '')
# post = find_in_soup(self, handle, annotation)
# # reply from the bot
# self.send_message(mto=self.groupchat,
# mbody="Thanks!",
# mtype='groupchat')
2021-01-10 17:10:38 +01:00
# Check if this is a book ...
if ' __BOOK__ ' in msg [ ' body ' ] :
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
self . send_message ( mto = self . groupchat ,
mbody = " Oh a book, that ' s cool! Thanks {} ! " . format ( msg [ ' mucnick ' ] ) ,
mtype = ' groupchat ' )
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
# Start of book feature
book = msg [ ' body ' ] . replace ( ' @bot ' , ' ' ) . replace ( ' /book ' , ' ' )
book = re . sub ( ' + ' , ' ' , book ) # remove double spaces
book = book . lstrip ( ) . rstrip ( ) # remove spaces at the beginning and at the end
book = book . replace ( ' ' , ' + ' ) . lower ( ) # turn space into + and lowercase
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
page_link = ' https://www.worldcat.org/search?q= {} &qt=results_page ' . format ( book )
page_response = requests . get ( page_link , timeout = 5 )
page_content = BeautifulSoup ( page_response . content , " html.parser " )
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
try :
book_title = page_content . findAll ( " div " , { " class " : " name " } ) [ 0 ] . text
book_author = page_content . findAll ( " div " , { " class " : " author " } ) [ 0 ] . text
book_publisher = page_content . findAll ( " div " , { " class " : " publisher " } ) [ 0 ] . text
response = ' <b>BOOK</b>: ' + book_title + ' ' + book_author + ' ' + book_publisher
book_found = True
except IndexError :
book_found = False
if book_found :
# Add message to log
message = ' <b>BOOK</b>: ' + book_title + ' ' + book_author + ' ' + book_publisher
message = f ' <div class= " entry book " > { message } </div> '
write_to_log ( self , message )
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
self . send_message ( mto = self . groupchat , mbody = ' Hope this was the book you were looking for: ' + book_title + ' ' + book_author + ' ' + book_publisher , mtype = ' groupchat ' )
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
else :
2021-01-10 16:10:13 +01:00
2021-01-10 17:10:38 +01:00
self . send_message ( mto = self . groupchat , mbody = ' Sorry, no book found! ' , mtype = ' groupchat ' )
2021-01-10 16:10:13 +01:00
2021-01-11 23:08:38 +01:00
# Generate HTML logfiles
# By default: log
if self . mode :
mode = str ( self . mode . lower ( ) . strip ( ) )
if mode == " log " :
print ( ' > log.py ' )
elif mode == " stream " :
print ( ' > stream.py ' )
elif mode == " distribusi " :
print ( ' > distribusi.py ' )
2021-01-10 16:10:13 +01:00
if __name__ == ' __main__ ' :
# Setup the command line arguments.
parser = ArgumentParser ( )
# output verbosity options.
parser . add_argument ( " -q " , " --quiet " , help = " set logging to ERROR " ,
action = " store_const " , dest = " loglevel " ,
const = logging . ERROR , default = logging . INFO )
parser . add_argument ( " -d " , " --debug " , help = " set logging to DEBUG " ,
action = " store_const " , dest = " loglevel " ,
const = logging . DEBUG , default = logging . INFO )
# Different options.
parser . add_argument ( " -u " , " --use " , dest = " use " ,
help = " XMPP address to use " )
parser . add_argument ( " -p " , " --password " , dest = " password " ,
help = " password to use " )
parser . add_argument ( " -g " , " --groupchat " , dest = " groupchat " ,
help = " groupchat to join " )
parser . add_argument ( " -n " , " --nick " , dest = " nickname " ,
help = " nickname for the bot " )
parser . add_argument ( " -o " , " --output " , dest = " output " ,
help = " output folder, this is where the files are stored " ,
type = str )
2021-01-11 23:08:38 +01:00
parser . add_argument ( " -m " , " --mode " , dest = " mode " ,
help = " logmode, options include: log, stream, distribusi " ,
type = str , default = ' log ' )
2021-01-10 16:10:13 +01:00
args = parser . parse_args ( )
# Setup logging.
logging . basicConfig ( level = args . loglevel ,
format = ' %(levelname)-8s %(message)s ' )
if args . use is None :
args . use = input ( " Use this XMPP address for the bot: " )
if args . password is None :
args . password = getpass ( " Password: " )
if args . groupchat is None :
args . groupchat = input ( " Groupchat XMPP address: " )
if args . nickname is None :
args . nickname = input ( " Nickname for the bot: " )
if args . output is None :
args . output = input ( " Output folder path of the log: " )
# Setup the MUCBot and register plugins. Note that while plugins may
# have interdependencies, the order in which you register them does
# not matter.
2021-01-11 23:08:38 +01:00
xmpp = MUCBot ( args . use , args . password , args . groupchat , args . nickname , args . output , args . mode )
2021-01-10 16:10:13 +01:00
xmpp . register_plugin ( ' xep_0030 ' ) # Service Discovery
xmpp . register_plugin ( ' xep_0045 ' ) # Multi-User Chat
xmpp . register_plugin ( ' xep_0199 ' ) # XMPP Ping
xmpp . register_plugin ( ' xep_0066 ' ) # Process URI's (files, images)
# Connect to the XMPP server and start processing XMPP stanzas.
xmpp . connect ( )
xmpp . process ( )