crunk
4 years ago
commit
aa26c94971
2 changed files with 87 additions and 0 deletions
@ -0,0 +1,87 @@ |
|||||
|
"""Palanggana is a small script to get plaintext from etherpump |
||||
|
bases on the magicwords used there""" |
||||
|
|
||||
|
|
||||
|
from bs4 import BeautifulSoup |
||||
|
import requests as req |
||||
|
import argparse |
||||
|
|
||||
|
parser = argparse.ArgumentParser(description="Get some text from the pump.") |
||||
|
parser.add_argument( |
||||
|
"-m", |
||||
|
"--magic", |
||||
|
required=True, |
||||
|
help="Add your magic word here", |
||||
|
) |
||||
|
parser.add_argument( |
||||
|
"-k", |
||||
|
"--keep", |
||||
|
help="keep the magic words", |
||||
|
action="store_true", |
||||
|
) |
||||
|
|
||||
|
# just a few variables that we will need |
||||
|
thepump = "https://etherpump.vvvvvvaria.org/" |
||||
|
args = parser.parse_args() |
||||
|
magicword = "__{0}__".format(f"{args.magic}".upper()) |
||||
|
keep = args.keep |
||||
|
|
||||
|
|
||||
|
def extractlinks(thewell): |
||||
|
"""extract the links to the plain text documents from etherpump""" |
||||
|
thewater = BeautifulSoup(thewell, "lxml") |
||||
|
thegoods = [] |
||||
|
for link in thewater.find_all("a", href=True): |
||||
|
if "raw.txt" in link["href"]: |
||||
|
textlink = "{0}{1}".format(thepump, link["href"]) |
||||
|
thegoods.append(textlink) |
||||
|
|
||||
|
return thegoods |
||||
|
|
||||
|
|
||||
|
def extracttext(thegoods): |
||||
|
"""extract the texts from the plain text links""" |
||||
|
plaintexts = [] |
||||
|
for link in thegoods: |
||||
|
res = req.get(link) |
||||
|
plaintexts.append(res.text.splitlines()) |
||||
|
return plaintexts |
||||
|
|
||||
|
|
||||
|
def extractmagic(plaintexts): |
||||
|
"""extract the lines that contain the magic word, keep the magic or not""" |
||||
|
magiclines = [] |
||||
|
# print(magicword) |
||||
|
for line in plaintexts: |
||||
|
if magicword in line: |
||||
|
if not keep: |
||||
|
line = line.replace(magicword, "") |
||||
|
magiclines.append(line) |
||||
|
return magiclines |
||||
|
|
||||
|
|
||||
|
def findthewell(thepump): |
||||
|
"""does the well have the magic you are looking for""" |
||||
|
resp = req.get(thepump) |
||||
|
soup = BeautifulSoup(resp.text, "lxml") |
||||
|
thewell = soup.find("div", id=magicword) |
||||
|
return thewell |
||||
|
|
||||
|
|
||||
|
thewell = findthewell(thepump) |
||||
|
|
||||
|
if thewell is not None: |
||||
|
thewell = thewell.prettify() |
||||
|
thegoods = extractlinks(thewell) |
||||
|
else: |
||||
|
print("Magic word {0} not found".format(magicword)) |
||||
|
quit() |
||||
|
|
||||
|
if thegoods is not None: |
||||
|
magiclines = [] |
||||
|
plaintexts = extracttext(thegoods) |
||||
|
for plaintext in plaintexts: |
||||
|
magiclines.extend(extractmagic(plaintext)) |
||||
|
|
||||
|
"""all is done, these are the texts you are looking for""" |
||||
|
print("\n".join(magiclines)) |
Loading…
Reference in new issue