crunk
4 years ago
commit
aa26c94971
2 changed files with 87 additions and 0 deletions
@ -0,0 +1,87 @@ |
|||
"""Palanggana is a small script to get plaintext from etherpump |
|||
bases on the magicwords used there""" |
|||
|
|||
|
|||
from bs4 import BeautifulSoup |
|||
import requests as req |
|||
import argparse |
|||
|
|||
parser = argparse.ArgumentParser(description="Get some text from the pump.") |
|||
parser.add_argument( |
|||
"-m", |
|||
"--magic", |
|||
required=True, |
|||
help="Add your magic word here", |
|||
) |
|||
parser.add_argument( |
|||
"-k", |
|||
"--keep", |
|||
help="keep the magic words", |
|||
action="store_true", |
|||
) |
|||
|
|||
# just a few variables that we will need |
|||
thepump = "https://etherpump.vvvvvvaria.org/" |
|||
args = parser.parse_args() |
|||
magicword = "__{0}__".format(f"{args.magic}".upper()) |
|||
keep = args.keep |
|||
|
|||
|
|||
def extractlinks(thewell): |
|||
"""extract the links to the plain text documents from etherpump""" |
|||
thewater = BeautifulSoup(thewell, "lxml") |
|||
thegoods = [] |
|||
for link in thewater.find_all("a", href=True): |
|||
if "raw.txt" in link["href"]: |
|||
textlink = "{0}{1}".format(thepump, link["href"]) |
|||
thegoods.append(textlink) |
|||
|
|||
return thegoods |
|||
|
|||
|
|||
def extracttext(thegoods): |
|||
"""extract the texts from the plain text links""" |
|||
plaintexts = [] |
|||
for link in thegoods: |
|||
res = req.get(link) |
|||
plaintexts.append(res.text.splitlines()) |
|||
return plaintexts |
|||
|
|||
|
|||
def extractmagic(plaintexts): |
|||
"""extract the lines that contain the magic word, keep the magic or not""" |
|||
magiclines = [] |
|||
# print(magicword) |
|||
for line in plaintexts: |
|||
if magicword in line: |
|||
if not keep: |
|||
line = line.replace(magicword, "") |
|||
magiclines.append(line) |
|||
return magiclines |
|||
|
|||
|
|||
def findthewell(thepump): |
|||
"""does the well have the magic you are looking for""" |
|||
resp = req.get(thepump) |
|||
soup = BeautifulSoup(resp.text, "lxml") |
|||
thewell = soup.find("div", id=magicword) |
|||
return thewell |
|||
|
|||
|
|||
thewell = findthewell(thepump) |
|||
|
|||
if thewell is not None: |
|||
thewell = thewell.prettify() |
|||
thegoods = extractlinks(thewell) |
|||
else: |
|||
print("Magic word {0} not found".format(magicword)) |
|||
quit() |
|||
|
|||
if thegoods is not None: |
|||
magiclines = [] |
|||
plaintexts = extracttext(thegoods) |
|||
for plaintext in plaintexts: |
|||
magiclines.extend(extractmagic(plaintext)) |
|||
|
|||
"""all is done, these are the texts you are looking for""" |
|||
print("\n".join(magiclines)) |
Loading…
Reference in new issue