"""Palanggana is a small script to get plaintext from etherpump bases on the magicwords used there""" from bs4 import BeautifulSoup import requests as req import argparse parser = argparse.ArgumentParser(description="Get some text from the pump.") parser.add_argument( "-m", "--magic", required=True, help="Add your magic word here", ) parser.add_argument( "-k", "--keep", help="keep the magic words", action="store_true", ) parser.add_argument( "-p", "--pad", help="specify a pad name, get only that pad", ) parser.add_argument( "-a", "--all", help="get all content from pad not just the lines containing the magic", action="store_true", ) # just a few variables that we will need thepump = "https://etherpump.vvvvvvaria.org/" args = parser.parse_args() magicword = "__{0}__".format(f"{args.magic}".upper()) padname = f"{args.pad}" keep = args.keep wholepad = args.all def extractlinks(thewell): """extract the links to the plain text documents from etherpump""" thewater = BeautifulSoup(thewell, "lxml") thegoods = [] for link in thewater.find_all("a", href=True): if "raw.txt" in link["href"]: textlink = "{0}{1}".format(thepump, link["href"]) thegoods.append(textlink) return thegoods def extracttext(thegoods): """extract the texts from the plain text links""" plaintexts = [] for link in thegoods: if padname and padname != "None": strippedlink = ( link.replace(thepump, "") .replace(".raw.txt", "") .replace("p/", "") ) if padname.lower() == strippedlink.lower(): res = req.get(link) # print("pad found {0} with link {1}".format(padname, link)) plaintexts.append(res.text.splitlines()) return plaintexts else: res = req.get(link) plaintexts.append(res.text.splitlines()) return plaintexts def extractmagic(plaintexts): """extract the lines that contain the magic word, keep the magic or not""" magiclines = [] # print(magicword) for line in plaintexts: if not wholepad: if magicword in line: if not keep: line = line.replace(magicword, "") magiclines.append(line) else: if not keep: line = line.replace(magicword, "") magiclines.append(line) return magiclines def findthewell(thepump): """does the well have the magic you are looking for""" resp = req.get(thepump) soup = BeautifulSoup(resp.text, "lxml") thewell = soup.find("div", id=magicword) return thewell thewell = findthewell(thepump) if thewell is not None: thewell = thewell.prettify() thegoods = extractlinks(thewell) else: print("Magic word {0} not found".format(magicword)) quit() if thegoods is not None: magiclines = [] plaintexts = extracttext(thegoods) for plaintext in plaintexts: magiclines.extend(extractmagic(plaintext)) """all is done, these are the texts you are looking for""" print("\n".join(magiclines))