Browse Source

first commit of the palanggana, get water(plaintext) from the (ether)pump

main
crunk 3 years ago
commit
aa26c94971
  1. 0
      README.md
  2. 87
      palanggana.py

0
README.md

87
palanggana.py

@ -0,0 +1,87 @@
"""Palanggana is a small script to get plaintext from etherpump
bases on the magicwords used there"""
from bs4 import BeautifulSoup
import requests as req
import argparse
parser = argparse.ArgumentParser(description="Get some text from the pump.")
parser.add_argument(
"-m",
"--magic",
required=True,
help="Add your magic word here",
)
parser.add_argument(
"-k",
"--keep",
help="keep the magic words",
action="store_true",
)
# just a few variables that we will need
thepump = "https://etherpump.vvvvvvaria.org/"
args = parser.parse_args()
magicword = "__{0}__".format(f"{args.magic}".upper())
keep = args.keep
def extractlinks(thewell):
"""extract the links to the plain text documents from etherpump"""
thewater = BeautifulSoup(thewell, "lxml")
thegoods = []
for link in thewater.find_all("a", href=True):
if "raw.txt" in link["href"]:
textlink = "{0}{1}".format(thepump, link["href"])
thegoods.append(textlink)
return thegoods
def extracttext(thegoods):
"""extract the texts from the plain text links"""
plaintexts = []
for link in thegoods:
res = req.get(link)
plaintexts.append(res.text.splitlines())
return plaintexts
def extractmagic(plaintexts):
"""extract the lines that contain the magic word, keep the magic or not"""
magiclines = []
# print(magicword)
for line in plaintexts:
if magicword in line:
if not keep:
line = line.replace(magicword, "")
magiclines.append(line)
return magiclines
def findthewell(thepump):
"""does the well have the magic you are looking for"""
resp = req.get(thepump)
soup = BeautifulSoup(resp.text, "lxml")
thewell = soup.find("div", id=magicword)
return thewell
thewell = findthewell(thepump)
if thewell is not None:
thewell = thewell.prettify()
thegoods = extractlinks(thewell)
else:
print("Magic word {0} not found".format(magicword))
quit()
if thegoods is not None:
magiclines = []
plaintexts = extracttext(thegoods)
for plaintext in plaintexts:
magiclines.extend(extractmagic(plaintext))
"""all is done, these are the texts you are looking for"""
print("\n".join(magiclines))
Loading…
Cancel
Save