diff --git a/Makefile b/Makefile index b23e9ad..86f9e05 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,13 @@ +SOURCE_DIRS := bin/ etherpump/ + publish: @rm -rf dist @python setup.py bdist_wheel @twine upload dist/* + +format: + @black $(SOURCE_DIRS) + @isort -rc $(SOURCE_DIRS) + +lint: + @flake8 $(SOURCE_DIRS) diff --git a/README.md b/README.md index c3ba38f..145cc45 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,23 @@ Publishing You should have a [PyPi](https://pypi.org/) account and be added as an owner/maintainer on the [etherpump package](https://pypi.org/project/etherpump/). +Maintenance utilities +--------------------- + +Tools to help things stay tidy over time. + +```bash +$ pip install flake8 isort black +$ make format +$ make lint +``` + +Please see the following links for further reading: + +* http://flake8.pycqa.org +* https://isort.readthedocs.io +* https://black.readthedocs.io + License ======= diff --git a/bin/etherpump b/bin/etherpump index 9161e4f..31a2c8e 100755 --- a/bin/etherpump +++ b/bin/etherpump @@ -1,8 +1,9 @@ #!/usr/bin/env python3 -from etherpump import VERSION import sys +from etherpump import VERSION + usage = """Usage: etherpump CMD @@ -43,7 +44,9 @@ except IndexError: sys.exit(0) try: # http://stackoverflow.com/questions/301134/dynamic-module-import-in-python - cmdmod = __import__("etherpump.commands.%s" % cmd, fromlist=["etherdump.commands"]) + cmdmod = __import__( + "etherpump.commands.%s" % cmd, fromlist=["etherdump.commands"] + ) cmdmod.main(args) except ImportError as e: print("Error performing command '{0}'\n(python said: {1})\n".format(cmd, e)) diff --git a/etherpump/__init__.py b/etherpump/__init__.py index c75943e..992440f 100644 --- a/etherpump/__init__.py +++ b/etherpump/__init__.py @@ -1,4 +1,4 @@ import os DATAPATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data") -VERSION = '0.0.2' +VERSION = '0.0.2' diff --git a/etherpump/commands/appendmeta.py b/etherpump/commands/appendmeta.py index 4e3e35a..578ec18 100644 --- a/etherpump/commands/appendmeta.py +++ b/etherpump/commands/appendmeta.py @@ -1,8 +1,10 @@ #!/usr/bin/env python +import json +import os from argparse import ArgumentParser -import json, os + def main(args): p = ArgumentParser("") @@ -18,6 +20,6 @@ def main(args): ret.append(meta) if args.indent: - print (json.dumps(ret, indent=args.indent)) + print(json.dumps(ret, indent=args.indent)) else: - print (json.dumps(ret)) + print(json.dumps(ret)) diff --git a/etherpump/commands/common.py b/etherpump/commands/common.py index 0946198..83b2ca6 100644 --- a/etherpump/commands/common.py +++ b/etherpump/commands/common.py @@ -1,24 +1,35 @@ - -import re, os, json, sys +import json +import os +import re +import sys +from html.entities import name2codepoint from math import ceil, floor from time import sleep - -from urllib.parse import urlparse, urlunparse, urlencode, quote_plus, unquote_plus -from urllib.request import urlopen, URLError, HTTPError -from html.entities import name2codepoint +from urllib.parse import ( + quote_plus, + unquote_plus, + urlencode, + urlparse, + urlunparse, +) +from urllib.request import HTTPError, URLError, urlopen groupnamepat = re.compile(r"^g\.(\w+)\$") -def splitpadname (padid): + + +def splitpadname(padid): m = groupnamepat.match(padid) if m: - return(m.group(1), padid[m.end():]) + return (m.group(1), padid[m.end() :]) else: return ("", padid) -def padurl (padid, ): + +def padurl(padid,): return padid -def padpath (padid, pub_path="", group_path="", normalize=False): + +def padpath(padid, pub_path="", group_path="", normalize=False): g, p = splitpadname(padid) p = quote_plus(p) if normalize: @@ -32,7 +43,8 @@ def padpath (padid, pub_path="", group_path="", normalize=False): else: return os.path.join(pub_path, p) -def padpath2id (path): + +def padpath2id(path): if type(path) == str: path = path.encode("utf-8") dd, p = os.path.split(path) @@ -43,7 +55,8 @@ def padpath2id (path): else: return p.decode("utf-8") -def getjson (url, max_retry=3, retry_sleep_time=3): + +def getjson(url, max_retry=3, retry_sleep_time=3): ret = {} ret["_retries"] = 0 while ret["_retries"] <= max_retry: @@ -61,13 +74,14 @@ def getjson (url, max_retry=3, retry_sleep_time=3): except ValueError as e: url = "http://localhost" + url except HTTPError as e: - print ("HTTPError {0}".format(e), file=sys.stderr) + print("HTTPError {0}".format(e), file=sys.stderr) ret["_code"] = e.code - ret["_retries"]+=1 + ret["_retries"] += 1 if retry_sleep_time: sleep(retry_sleep_time) return ret + def loadpadinfo(p): with open(p) as f: info = json.load(f) @@ -75,17 +89,17 @@ def loadpadinfo(p): info['localapiurl'] = info.get('apiurl') return info -def progressbar (i, num, label="", file=sys.stderr): + +def progressbar(i, num, label="", file=sys.stderr): p = float(i) / num - percentage = int(floor(p*100)) - bars = int(ceil(p*20)) - bar = ("*"*bars) + ("-"*(20-bars)) - msg = "\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label) + percentage = int(floor(p * 100)) + bars = int(ceil(p * 20)) + bar = ("*" * bars) + ("-" * (20 - bars)) + msg = "\r{0} {1}/{2} {3}... ".format(bar, (i + 1), num, label) sys.stderr.write(msg) sys.stderr.flush() - # Python developer Fredrik Lundh (author of elementtree, among other things) has such a function on his website, which works with decimal, hex and named entities: ## # Removes HTML or XML character references and entities from a text string. @@ -110,5 +124,6 @@ def unescape(text): text = chr(name2codepoint[text[1:-1]]) except KeyError: pass - return text # leave as is + return text # leave as is + return re.sub("&#?\w+;", fixup, text) diff --git a/etherpump/commands/creatediffhtml.py b/etherpump/commands/creatediffhtml.py index e5dd8dc..766a15b 100644 --- a/etherpump/commands/creatediffhtml.py +++ b/etherpump/commands/creatediffhtml.py @@ -1,18 +1,29 @@ - -from argparse import ArgumentParser import json +from argparse import ArgumentParser +from urllib.error import HTTPError, URLError from urllib.parse import urlencode from urllib.request import urlopen -from urllib.error import HTTPError, URLError def main(args): - p = ArgumentParser("calls the createDiffHTML API function for the given padid") + p = ArgumentParser( + "calls the createDiffHTML API function for the given padid" + ) p.add_argument("padid", help="the padid") - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: .etherdump/settings.json", + ) p.add_argument("--showurl", default=False, action="store_true") - p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") - p.add_argument("--rev", type=int, default=None, help="revision, default: latest") + p.add_argument( + "--format", + default="text", + help="output format, can be: text, json; default: text", + ) + p.add_argument( + "--rev", type=int, default=None, help="revision, default: latest" + ) args = p.parse_args(args) with open(args.padinfo) as f: @@ -25,15 +36,15 @@ def main(args): data['startRev'] = "0" if args.rev != None: data['rev'] = args.rev - requesturl = apiurl+'createDiffHTML?'+urlencode(data) + requesturl = apiurl + 'createDiffHTML?' + urlencode(data) if args.showurl: - print (requesturl) + print(requesturl) else: try: results = json.load(urlopen(requesturl))['data'] if args.format == "json": - print (json.dumps(results)) + print(json.dumps(results)) else: - print (results['html'].encode("utf-8")) + print(results['html'].encode("utf-8")) except HTTPError as e: - pass \ No newline at end of file + pass diff --git a/etherpump/commands/deletepad.py b/etherpump/commands/deletepad.py index 8d1800e..aa77fbb 100644 --- a/etherpump/commands/deletepad.py +++ b/etherpump/commands/deletepad.py @@ -1,17 +1,24 @@ - -from argparse import ArgumentParser import json +from argparse import ArgumentParser +from urllib.error import HTTPError, URLError from urllib.parse import urlencode from urllib.request import urlopen -from urllib.error import HTTPError, URLError def main(args): p = ArgumentParser("calls the getText API function for the given padid") p.add_argument("padid", help="the padid") - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: .etherdump/settings.json", + ) p.add_argument("--showurl", default=False, action="store_true") - p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") + p.add_argument( + "--format", + default="text", + help="output format, can be: text, json; default: text", + ) args = p.parse_args(args) with open(args.padinfo) as f: @@ -20,14 +27,14 @@ def main(args): # apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) data = {} data['apikey'] = info['apikey'] - data['padID'] = args.padid # is utf-8 encoded - requesturl = apiurl+'deletePad?'+urlencode(data) + data['padID'] = args.padid # is utf-8 encoded + requesturl = apiurl + 'deletePad?' + urlencode(data) if args.showurl: - print (requesturl) + print(requesturl) else: results = json.load(urlopen(requesturl)) if args.format == "json": - print (json.dumps(results)) + print(json.dumps(results)) else: if results['data']: - print (results['data']['text'].encode("utf-8")) + print(results['data']['text'].encode("utf-8")) diff --git a/etherpump/commands/dumpcsv.py b/etherpump/commands/dumpcsv.py index fc1817b..a62c85e 100644 --- a/etherpump/commands/dumpcsv.py +++ b/etherpump/commands/dumpcsv.py @@ -1,12 +1,13 @@ - +import json +import re +import sys from argparse import ArgumentParser -import sys, json, re +from csv import writer from datetime import datetime +from math import ceil, floor +from urllib.error import HTTPError, URLError from urllib.parse import urlencode from urllib.request import urlopen -from urllib.error import HTTPError, URLError -from csv import writer -from math import ceil, floor """ Dumps a CSV of all pads with columns @@ -23,16 +24,27 @@ groupnamepat = re.compile(r"^g\.(\w+)\$") out = writer(sys.stdout) -def jsonload (url): + +def jsonload(url): f = urlopen(url) data = f.read() f.close() return json.loads(data) -def main (args): + +def main(args): p = ArgumentParser("outputs a CSV of information all all pads") - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") - p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False") + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: .etherdump/settings.json", + ) + p.add_argument( + "--zerorevs", + default=False, + action="store_true", + help="include pads with zero revisions, default: False", + ) args = p.parse_args(args) with open(args.padinfo) as f: @@ -40,7 +52,7 @@ def main (args): apiurl = info.get("apiurl") data = {} data['apikey'] = info['apikey'] - requesturl = apiurl+'listAllPads?'+urlencode(data) + requesturl = apiurl + 'listAllPads?' + urlencode(data) padids = jsonload(requesturl)['data']['padIDs'] padids.sort() @@ -49,36 +61,50 @@ def main (args): count = 0 out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids")) for i, padid in enumerate(padids): - p = (float(i) / numpads) - percentage = int(floor(p*100)) - bars = int(ceil(p*20)) - bar = ("*"*bars) + ("-"*(20-bars)) - msg = "\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid) + p = float(i) / numpads + percentage = int(floor(p * 100)) + bars = int(ceil(p * 20)) + bar = ("*" * bars) + ("-" * (20 - bars)) + msg = "\r{0} {1}/{2} {3}... ".format(bar, (i + 1), numpads, padid) if len(msg) > maxmsglen: maxmsglen = len(msg) - sys.stderr.write("\r{0}".format(" "*maxmsglen)) + sys.stderr.write("\r{0}".format(" " * maxmsglen)) sys.stderr.write(msg.encode("utf-8")) sys.stderr.flush() m = groupnamepat.match(padid) if m: groupname = m.group(1) - padidnogroup = padid[m.end():] + padidnogroup = padid[m.end() :] else: groupname = "" padidnogroup = padid data['padID'] = padid.encode("utf-8") - revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions'] + revisions = jsonload(apiurl + 'getRevisionsCount?' + urlencode(data))[ + 'data' + ]['revisions'] if (revisions == 0) and not args.zerorevs: continue - - lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited'] - lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat() - author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] + lastedited_raw = jsonload(apiurl + 'getLastEdited?' + urlencode(data))[ + 'data' + ]['lastEdited'] + lastedited_iso = datetime.fromtimestamp( + int(lastedited_raw) / 1000 + ).isoformat() + author_ids = jsonload(apiurl + 'listAuthorsOfPad?' + urlencode(data))[ + 'data' + ]['authorIDs'] author_ids = " ".join(author_ids).encode("utf-8") - out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids)) + out.writerow( + ( + padidnogroup.encode("utf-8"), + groupname.encode("utf-8"), + revisions, + lastedited_iso, + author_ids, + ) + ) count += 1 print("\nWrote {0} rows...".format(count), file=sys.stderr) - diff --git a/etherpump/commands/gethtml.py b/etherpump/commands/gethtml.py index 85d9f76..8646c3e 100644 --- a/etherpump/commands/gethtml.py +++ b/etherpump/commands/gethtml.py @@ -1,18 +1,27 @@ - -from argparse import ArgumentParser import json +from argparse import ArgumentParser +from urllib.error import HTTPError, URLError from urllib.parse import urlencode from urllib.request import urlopen -from urllib.error import HTTPError, URLError def main(args): p = ArgumentParser("calls the getHTML API function for the given padid") p.add_argument("padid", help="the padid") - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: .etherdump/settings.json", + ) p.add_argument("--showurl", default=False, action="store_true") - p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") - p.add_argument("--rev", type=int, default=None, help="revision, default: latest") + p.add_argument( + "--format", + default="text", + help="output format, can be: text, json; default: text", + ) + p.add_argument( + "--rev", type=int, default=None, help="revision, default: latest" + ) args = p.parse_args(args) with open(args.padinfo) as f: @@ -24,12 +33,12 @@ def main(args): data['padID'] = args.padid if args.rev != None: data['rev'] = args.rev - requesturl = apiurl+'getHTML?'+urlencode(data) + requesturl = apiurl + 'getHTML?' + urlencode(data) if args.showurl: - print (requesturl) + print(requesturl) else: results = json.load(urlopen(requesturl))['data'] if args.format == "json": - print (json.dumps(results)) + print(json.dumps(results)) else: - print (results['html'].encode("utf-8")) + print(results['html'].encode("utf-8")) diff --git a/etherpump/commands/gettext.py b/etherpump/commands/gettext.py index 6ac1c65..e66e81a 100644 --- a/etherpump/commands/gettext.py +++ b/etherpump/commands/gettext.py @@ -1,17 +1,27 @@ - +import json +import sys from argparse import ArgumentParser -import json, sys from urllib.parse import urlencode -from urllib.request import urlopen, URLError, HTTPError +from urllib.request import HTTPError, URLError, urlopen def main(args): p = ArgumentParser("calls the getText API function for the given padid") p.add_argument("padid", help="the padid") - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: .etherdump/settings.json", + ) p.add_argument("--showurl", default=False, action="store_true") - p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") - p.add_argument("--rev", type=int, default=None, help="revision, default: latest") + p.add_argument( + "--format", + default="text", + help="output format, can be: text, json; default: text", + ) + p.add_argument( + "--rev", type=int, default=None, help="revision, default: latest" + ) args = p.parse_args(args) with open(args.padinfo) as f: @@ -20,18 +30,18 @@ def main(args): # apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) data = {} data['apikey'] = info['apikey'] - data['padID'] = args.padid # is utf-8 encoded + data['padID'] = args.padid # is utf-8 encoded if args.rev != None: data['rev'] = args.rev - requesturl = apiurl+'getText?'+urlencode(data) + requesturl = apiurl + 'getText?' + urlencode(data) if args.showurl: - print (requesturl) + print(requesturl) else: resp = urlopen(requesturl).read() resp = resp.decode("utf-8") results = json.loads(resp) if args.format == "json": - print (json.dumps(results)) + print(json.dumps(results)) else: if results['data']: sys.stdout.write(results['data']['text']) diff --git a/etherpump/commands/html5tidy.py b/etherpump/commands/html5tidy.py index d917e99..2f8881c 100644 --- a/etherpump/commands/html5tidy.py +++ b/etherpump/commands/html5tidy.py @@ -1,28 +1,31 @@ #!/usr/bin/env python3 -from html5lib import parse -import os, sys +import os +import sys from argparse import ArgumentParser -from xml.etree import ElementTree as ET +from xml.etree import ElementTree as ET + +from html5lib import parse def etree_indent(elem, level=0): - i = "\n" + level*" " + i = "\n" + level * " " if len(elem): if not elem.text or not elem.text.strip(): elem.text = i + " " if not elem.tail or not elem.tail.strip(): elem.tail = i for elem in elem: - etree_indent(elem, level+1) + etree_indent(elem, level + 1) if not elem.tail or not elem.tail.strip(): elem.tail = i else: if level and (not elem.tail or not elem.tail.strip()): elem.tail = i -def get_link_type (url): + +def get_link_type(url): lurl = url.lower() if lurl.endswith(".html") or lurl.endswith(".htm"): return "text/html" @@ -37,13 +40,17 @@ def get_link_type (url): elif lurl.endswith(".js") or lurl.endswith(".jsonp"): return "text/javascript" -def pluralize (x): + +def pluralize(x): if type(x) == list or type(x) == tuple: return x else: return (x,) -def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, indent=False): + +def html5tidy( + doc, charset="utf-8", title=None, scripts=None, links=None, indent=False +): if scripts: script_srcs = [x.attrib.get("src") for x in doc.findall(".//script")] for src in pluralize(scripts): @@ -56,21 +63,30 @@ def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, inden for elt in doc.findall(".//link"): href = elt.attrib.get("href") if href: - existinglinks[href] = elt + existinglinks[href] = elt for link in links: linktype = link.get("type") or get_link_type(link["href"]) if link["href"] in existinglinks: elt = existinglinks[link["href"]] elt.attrib["rel"] = link["rel"] else: - elt = ET.SubElement(doc.find(".//head"), "link", href=link["href"], rel=link["rel"]) + elt = ET.SubElement( + doc.find(".//head"), + "link", + href=link["href"], + rel=link["rel"], + ) if linktype: - elt.attrib["type"] = linktype + elt.attrib["type"] = linktype if "title" in link: elt.attrib["title"] = link["title"] if charset: - meta_charsets = [x.attrib.get("charset") for x in doc.findall(".//meta") if x.attrib.get("charset") != None] + meta_charsets = [ + x.attrib.get("charset") + for x in doc.findall(".//meta") + if x.attrib.get("charset") != None + ] if not meta_charsets: meta = ET.SubElement(doc.find(".//head"), "meta", charset=charset) @@ -79,33 +95,89 @@ def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, inden if not titleelt: titleelt = ET.SubElement(doc.find(".//head"), "title") titleelt.text = title - + if indent: etree_indent(doc) return doc -def main (args): + +def main(args): p = ArgumentParser("") p.add_argument("input", nargs="?", default=None) p.add_argument("--indent", default=False, action="store_true") - p.add_argument("--mogrify", default=False, action="store_true", help="modify file in place") - p.add_argument("--method", default="html", help="method, default: html, values: html, xml, text") + p.add_argument( + "--mogrify", + default=False, + action="store_true", + help="modify file in place", + ) + p.add_argument( + "--method", + default="html", + help="method, default: html, values: html, xml, text", + ) p.add_argument("--output", default=None, help="") p.add_argument("--title", default=None, help="ensure/add title tag in head") - p.add_argument("--charset", default="utf-8", help="ensure/add meta tag with charset") - p.add_argument("--script", action="append", default=[], help="ensure/add script tag") + p.add_argument( + "--charset", default="utf-8", help="ensure/add meta tag with charset" + ) + p.add_argument( + "--script", action="append", default=[], help="ensure/add script tag" + ) # s, see https://www.w3.org/TR/html5/links.html#links - p.add_argument("--stylesheet", action="append", default=[], help="ensure/add style link") - p.add_argument("--alternate", action="append", default=[], nargs="+", help="ensure/add alternate links (optionally followed by a title and type)") - p.add_argument("--next", action="append", default=[], nargs="+", help="ensure/add alternate link") - p.add_argument("--prev", action="append", default=[], nargs="+", help="ensure/add alternate link") - p.add_argument("--search", action="append", default=[], nargs="+", help="ensure/add search link") - p.add_argument("--rss", action="append", default=[], nargs="+", help="ensure/add alternate link of type application/rss+xml") - p.add_argument("--atom", action="append", default=[], nargs="+", help="ensure/add alternate link of type application/atom+xml") + p.add_argument( + "--stylesheet", + action="append", + default=[], + help="ensure/add style link", + ) + p.add_argument( + "--alternate", + action="append", + default=[], + nargs="+", + help="ensure/add alternate links (optionally followed by a title and type)", + ) + p.add_argument( + "--next", + action="append", + default=[], + nargs="+", + help="ensure/add alternate link", + ) + p.add_argument( + "--prev", + action="append", + default=[], + nargs="+", + help="ensure/add alternate link", + ) + p.add_argument( + "--search", + action="append", + default=[], + nargs="+", + help="ensure/add search link", + ) + p.add_argument( + "--rss", + action="append", + default=[], + nargs="+", + help="ensure/add alternate link of type application/rss+xml", + ) + p.add_argument( + "--atom", + action="append", + default=[], + nargs="+", + help="ensure/add alternate link of type application/atom+xml", + ) args = p.parse_args(args) links = [] - def add_links (links, items, rel, _type=None): + + def add_links(links, items, rel, _type=None): for href in items: d = {} d["rel"] = rel @@ -128,6 +200,7 @@ def main (args): d["href"] = href links.append(d) + for rel in ("stylesheet", "alternate", "next", "prev", "search"): add_links(links, getattr(args, rel), rel) for item in args.rss: @@ -144,27 +217,33 @@ def main (args): doc = parse(fin, treebuilder="etree", namespaceHTMLElements=False) if fin != sys.stdin: fin.close() - html5tidy(doc, scripts=args.script, links=links, title=args.title, indent=args.indent) + html5tidy( + doc, + scripts=args.script, + links=links, + title=args.title, + indent=args.indent, + ) # OUTPUT tmppath = None if args.output: fout = open(args.output, "w") elif args.mogrify: - tmppath = args.input+".tmp" + tmppath = args.input + ".tmp" fout = open(tmppath, "w") else: fout = sys.stdout - print (ET.tostring(doc, method=args.method, encoding="unicode"), file=fout) + print(ET.tostring(doc, method=args.method, encoding="unicode"), file=fout) if fout != sys.stdout: fout.close() if tmppath: - os.rename(args.input, args.input+"~") + os.rename(args.input, args.input + "~") os.rename(tmppath, args.input) -if __name__ == "__main__": +if __name__ == "__main__": main(sys.argv) diff --git a/etherpump/commands/index.py b/etherpump/commands/index.py index a7f0293..3608c14 100644 --- a/etherpump/commands/index.py +++ b/etherpump/commands/index.py @@ -1,16 +1,19 @@ - +import json +import os +import re +import sys +import time from argparse import ArgumentParser -import sys, json, re, os, time from datetime import datetime -import dateutil.parser - -from urllib.parse import urlparse, urlunparse, urlencode, quote -from urllib.request import urlopen, URLError, HTTPError - -from jinja2 import FileSystemLoader, Environment -from etherpump.commands.common import * from time import sleep +from urllib.parse import quote, urlencode, urlparse, urlunparse +from urllib.request import HTTPError, URLError, urlopen + +from jinja2 import Environment, FileSystemLoader + import dateutil.parser +from etherpump.commands.common import * + """ index: @@ -20,7 +23,8 @@ index: """ -def group (items, key=lambda x: x): + +def group(items, key=lambda x: x): """ returns a list of lists, of items grouped by a key function """ ret = [] keys = {} @@ -34,10 +38,12 @@ def group (items, key=lambda x: x): ret.append(keys[k]) return ret + # def base (x): # return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) -def splitextlong (x): + +def splitextlong(x): """ split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """ m = re.search(r"^(.*?)(\..*)$", x) if m: @@ -45,20 +51,24 @@ def splitextlong (x): else: return x, '' -def base (x): + +def base(x): return splitextlong(x)[0] -def excerpt (t, chars=25): + +def excerpt(t, chars=25): if len(t) > chars: t = t[:chars] + "..." return t -def absurl (url, base=None): + +def absurl(url, base=None): if not url.startswith("http"): return base + url return url -def url_base (url): + +def url_base(url): (scheme, netloc, path, params, query, fragment) = urlparse(url) path, _ = os.path.split(path.lstrip("/")) ret = urlunparse((scheme, netloc, path, None, None, None)) @@ -66,45 +76,131 @@ def url_base (url): ret += "/" return ret -def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'): + +def datetimeformat(t, format='%Y-%m-%d %H:%M:%S'): if type(t) == str: dt = dateutil.parser.parse(t) return dt.strftime(format) else: return time.strftime(format, time.localtime(t)) -def main (args): + +def main(args): p = ArgumentParser("Convert dumped files to a document via a template.") p.add_argument("input", nargs="+", help="Files to list (.meta.json files)") - p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in") - p.add_argument("--template", default="index.html", help="template name, built-ins include index.html, rss.xml; default: index.html") - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: ./.etherdump/settings.json") + p.add_argument( + "--templatepath", + default=None, + help="path to find templates, default: built-in", + ) + p.add_argument( + "--template", + default="index.html", + help="template name, built-ins include index.html, rss.xml; default: index.html", + ) + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: ./.etherdump/settings.json", + ) # p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") - p.add_argument("--order", default="padid", help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid") - p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)") - p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)") - p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") + p.add_argument( + "--order", + default="padid", + help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid", + ) + p.add_argument( + "--reverse", + default=False, + action="store_true", + help="reverse order, default: False (reverse chrono)", + ) + p.add_argument( + "--limit", + type=int, + default=0, + help="limit to number of items, default: 0 (no limit)", + ) + p.add_argument( + "--skip", + default=None, + type=int, + help="skip this many items, default: None", + ) - p.add_argument("--content", default=False, action="store_true", help="rss: include (full) content tag, default: False") - p.add_argument("--link", default="diffhtml,html,text", help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text") - p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl") + p.add_argument( + "--content", + default=False, + action="store_true", + help="rss: include (full) content tag, default: False", + ) + p.add_argument( + "--link", + default="diffhtml,html,text", + help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text", + ) + p.add_argument( + "--linkbase", + default=None, + help="base url to use for links, default: try to use the feedurl", + ) p.add_argument("--output", default=None, help="output, default: stdout") - p.add_argument("--files", default=False, action="store_true", help="include files (experimental)") + p.add_argument( + "--files", + default=False, + action="store_true", + help="include files (experimental)", + ) pg = p.add_argument_group('template variables') - pg.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml") - pg.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url") - pg.add_argument("--title", default="etherpump", help="title for document or rss feed channel title, default: etherdump") - pg.add_argument("--description", default="", help="rss: channel description, default: empty") - pg.add_argument("--language", default="en-US", help="rss: feed language, default: en-US") - pg.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily") - pg.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1") - pg.add_argument("--generator", default="https://gitlab.com/activearchives/etherpump", help="generator, default: https://gitlab.com/activearchives/etherdump") - pg.add_argument("--timestamp", default=None, help="timestamp, default: now (e.g. 2015-12-01 12:30:00)") + pg.add_argument( + "--feedurl", + default="feed.xml", + help="rss: to use as feeds own (self) link, default: feed.xml", + ) + pg.add_argument( + "--siteurl", + default=None, + help="rss: to use as channel's site link, default: the etherpad url", + ) + pg.add_argument( + "--title", + default="etherpump", + help="title for document or rss feed channel title, default: etherdump", + ) + pg.add_argument( + "--description", + default="", + help="rss: channel description, default: empty", + ) + pg.add_argument( + "--language", default="en-US", help="rss: feed language, default: en-US" + ) + pg.add_argument( + "--updatePeriod", + default="daily", + help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily", + ) + pg.add_argument( + "--updateFrequency", + default=1, + type=int, + help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1", + ) + pg.add_argument( + "--generator", + default="https://gitlab.com/activearchives/etherpump", + help="generator, default: https://gitlab.com/activearchives/etherdump", + ) + pg.add_argument( + "--timestamp", + default=None, + help="timestamp, default: now (e.g. 2015-12-01 12:30:00)", + ) pg.add_argument("--next", default=None, help="next link, default: None)") pg.add_argument("--prev", default=None, help="prev link, default: None") @@ -129,17 +225,12 @@ def main (args): # Use "base" to strip (longest) extensions # inputs = group(inputs, base) - def wrappath (p): + def wrappath(p): path = "./{0}".format(p) ext = os.path.splitext(p)[1][1:] - return { - "url": path, - "path": path, - "code": 200, - "type": ext - } + return {"url": path, "path": path, "code": 200, "type": ext} - def metaforpaths (paths): + def metaforpaths(paths): ret = {} pid = base(paths[0]) ret['pad'] = ret['padid'] = pid @@ -149,7 +240,9 @@ def main (args): mtime = os.stat(p).st_mtime if lastedited == None or mtime > lastedited: lastedited = mtime - ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S") + ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime( + "%Y-%m-%dT%H:%M:%S" + ) ret["lastedited_raw"] = mtime return ret @@ -169,7 +262,7 @@ def main (args): # else: # return metaforpaths(paths) - def fixdates (padmeta): + def fixdates(padmeta): d = dateutil.parser.parse(padmeta["lastedited_iso"]) padmeta["lastedited"] = d padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000") @@ -180,17 +273,21 @@ def main (args): pads = list(map(fixdates, pads)) args.pads = list(pads) - def could_have_base (x, y): - return x == y or (x.startswith(y) and x[len(y):].startswith(".")) + def could_have_base(x, y): + return x == y or (x.startswith(y) and x[len(y) :].startswith(".")) - def get_best_pad (x): + def get_best_pad(x): for pb in padbases: p = pads_by_base[pb] if could_have_base(x, pb): return p - def has_version (padinfo, path): - return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path] + def has_version(padinfo, path): + return [ + x + for x in padinfo['versions'] + if 'path' in x and x['path'] == "./" + path + ] if args.files: inputs = args.input @@ -208,25 +305,33 @@ def main (args): # print ("PADBASES", file=sys.stderr) # for pb in padbases: # print (" ", pb, file=sys.stderr) - print ("pairing input files with pads", file=sys.stderr) + print("pairing input files with pads", file=sys.stderr) for x in inputs: # pair input with a pad if possible xbasename = os.path.basename(x) p = get_best_pad(xbasename) if p: if not has_version(p, x): - print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr) + print( + "Grouping file {0} with pad {1}".format(x, p['padid']), + file=sys.stderr, + ) p['versions'].append(wrappath(x)) else: - print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr) + print( + "Skipping existing version {0} ({1})...".format( + x, p['padid'] + ), + file=sys.stderr, + ) removelist.append(x) # Removed Matches files for x in removelist: inputs.remove(x) - print ("Remaining files:", file=sys.stderr) + print("Remaining files:", file=sys.stderr) for x in inputs: - print (x, file=sys.stderr) - print (file=sys.stderr) + print(x, file=sys.stderr) + print(file=sys.stderr) # Add "fake" pads for remaining files for x in inputs: args.pads.append(metaforpaths([x])) @@ -242,7 +347,9 @@ def main (args): # order items & apply limit if args.order == "lastedited": - args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse) + args.pads.sort( + key=lambda x: x.get("lastedited_iso"), reverse=args.reverse + ) elif args.order == "pad": args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse) elif args.order == "padid": @@ -250,12 +357,14 @@ def main (args): elif args.order == "revisions": args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse) elif args.order == "authors": - args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse) + args.pads.sort( + key=lambda x: len(x.get("authors")), reverse=args.reverse + ) else: raise Exception("That ordering is not implemented!") if args.limit: - args.pads = args.pads[:args.limit] + args.pads = args.pads[: args.limit] # add versions_by_type, add in full text # add link (based on args.link) @@ -272,7 +381,7 @@ def main (args): if "text" in versions_by_type: try: - with open (versions_by_type["text"]["path"]) as f: + with open(versions_by_type["text"]["path"]) as f: p["text"] = f.read() except FileNotFoundError: p['text'] = '' @@ -289,6 +398,6 @@ def main (args): if args.output: with open(args.output, "w") as f: - print (template.render(vars(args)), file=f) + print(template.render(vars(args)), file=f) else: - print (template.render(vars(args))) + print(template.render(vars(args))) diff --git a/etherpump/commands/init.py b/etherpump/commands/init.py index a8d2b2b..04d84c0 100644 --- a/etherpump/commands/init.py +++ b/etherpump/commands/init.py @@ -1,19 +1,19 @@ - +import json +import os +import sys from argparse import ArgumentParser +from urllib.parse import urlencode, urlparse, urlunparse +from urllib.request import HTTPError, URLError, urlopen -from urllib.parse import urlparse, urlunparse, urlencode -from urllib.request import urlopen, URLError, HTTPError - -import json, os, sys def get_api(url, cmd=None, data=None, verbose=False): try: - useurl = url+cmd + useurl = url + cmd if data: - useurl += "?"+urlencode(data) + useurl += "?" + urlencode(data) # data['apikey'] = "7c8faa070c97f83d8f705c935a32d5141f89cbaa2158042fa92e8ddad5dbc5e1" if verbose: - print ("trying", useurl, file=sys.stderr) + print("trying", useurl, file=sys.stderr) resp = urlopen(useurl).read() resp = resp.decode("utf-8") resp = json.loads(resp) @@ -21,11 +21,11 @@ def get_api(url, cmd=None, data=None, verbose=False): return resp except ValueError as e: if verbose: - print (" ValueError", e, file=sys.stderr) + print(" ValueError", e, file=sys.stderr) return except HTTPError as e: if verbose: - print (" HTTPError", e, file=sys.stderr) + print(" HTTPError", e, file=sys.stderr) if e.code == 401: # Unauthorized is how the API responds to an incorrect API key return {"code": 401, "message": e} @@ -34,7 +34,8 @@ def get_api(url, cmd=None, data=None, verbose=False): # # print ("returning", resp, file=sys.stderr) # return resp -def tryapiurl (url, verbose=False): + +def tryapiurl(url, verbose=False): """ Try to use url as api, correcting if possible. Returns corrected / normalized URL, or None if not possible @@ -47,22 +48,30 @@ def tryapiurl (url, verbose=False): params, query, fragment = ("", "", "") path = path.strip("/") # 1. try directly... - apiurl = urlunparse((scheme, netloc, path, params, query, fragment))+"/" + apiurl = ( + urlunparse((scheme, netloc, path, params, query, fragment)) + "/" + ) if get_api(apiurl, "listAllPads", verbose=verbose): return apiurl # 2. try with += api/1.2.9 - path = os.path.join(path, "api", "1.2.9")+"/" + path = os.path.join(path, "api", "1.2.9") + "/" apiurl = urlunparse((scheme, netloc, path, params, query, fragment)) if get_api(apiurl, "listAllPads", verbose=verbose): return apiurl # except ValueError as e: # print ("ValueError", e, file=sys.stderr) except URLError as e: - print ("URLError", e, file=sys.stderr) + print("URLError", e, file=sys.stderr) + def main(args): p = ArgumentParser("initialize an etherpump folder") - p.add_argument("arg", nargs="*", default=[], help="optional positional args: path etherpadurl") + p.add_argument( + "arg", + nargs="*", + default=[], + help="optional positional args: path etherpadurl", + ) p.add_argument("--path", default=None, help="path to initialize") p.add_argument("--padurl", default=None, help="") p.add_argument("--apikey", default=None, help="") @@ -70,7 +79,6 @@ def main(args): p.add_argument("--reinit", default=False, action="store_true", help="") args = p.parse_args(args) - path = args.path if path == None and len(args.arg): path = args.arg[0] @@ -89,7 +97,9 @@ def main(args): with open(padinfopath) as f: padinfo = json.load(f) if not args.reinit: - print ("Folder is already initialized. Use --reinit to reset settings.") + print( + "Folder is already initialized. Use --reinit to reset settings." + ) sys.exit(0) except IOError: pass @@ -100,7 +110,7 @@ def main(args): apiurl = args.padurl while True: if apiurl: - apiurl = tryapiurl(apiurl,verbose=args.verbose) + apiurl = tryapiurl(apiurl, verbose=args.verbose) if apiurl: # print ("Got APIURL: {0}".format(apiurl)) break @@ -109,13 +119,18 @@ def main(args): apikey = args.apikey while True: if apikey: - resp = get_api(apiurl, "listAllPads", {"apikey": apikey}, verbose=args.verbose) + resp = get_api( + apiurl, "listAllPads", {"apikey": apikey}, verbose=args.verbose + ) if resp and resp["code"] == 0: # print ("GOOD") break else: - print ("bad") - print ("The APIKEY is the contents of the file APIKEY.txt in the etherpad folder", file=sys.stderr) + print("bad") + print( + "The APIKEY is the contents of the file APIKEY.txt in the etherpad folder", + file=sys.stderr, + ) apikey = input("Please paste the APIKEY: ").strip() padinfo["apikey"] = apikey diff --git a/etherpump/commands/join.py b/etherpump/commands/join.py index 7de9195..22ad9b3 100644 --- a/etherpump/commands/join.py +++ b/etherpump/commands/join.py @@ -1,11 +1,13 @@ - +import json +import os +import re from argparse import ArgumentParser -import json, os, re +from urllib.error import HTTPError, URLError from urllib.parse import urlencode from urllib.request import urlopen -from urllib.error import HTTPError, URLError -def group (items, key=lambda x: x): + +def group(items, key=lambda x: x): ret = [] keys = {} for item in items: @@ -18,6 +20,7 @@ def group (items, key=lambda x: x): ret.append(keys[k]) return ret + def main(args): p = ArgumentParser("") p.add_argument("input", nargs="+", help="filenames") @@ -28,10 +31,11 @@ def main(args): inputs = [x for x in inputs if not os.path.isdir(x)] - def base (x): + def base(x): return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x) - #from pprint import pprint - #pprint() + + # from pprint import pprint + # pprint() gg = group(inputs, base) for items in gg: itembase = base(items[0]) @@ -41,5 +45,5 @@ def main(args): pass for i in items: newloc = os.path.join(itembase, i) - print ("'{0}' => '{1}'".format(i, newloc)) + print("'{0}' => '{1}'".format(i, newloc)) os.rename(i, newloc) diff --git a/etherpump/commands/list.py b/etherpump/commands/list.py index 9a686ca..9ba715d 100644 --- a/etherpump/commands/list.py +++ b/etherpump/commands/list.py @@ -1,31 +1,40 @@ - -from argparse import ArgumentParser import json import sys -from etherpump.commands.common import getjson -from urllib.parse import urlparse, urlunparse, urlencode -from urllib.request import urlopen, URLError, HTTPError +from argparse import ArgumentParser +from urllib.parse import urlencode, urlparse, urlunparse +from urllib.request import HTTPError, URLError, urlopen -def main (args): +from etherpump.commands.common import getjson + + +def main(args): p = ArgumentParser("call listAllPads and print the results") - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: .etherdump/settings.json", + ) p.add_argument("--showurl", default=False, action="store_true") - p.add_argument("--format", default="lines", help="output format: lines, json; default lines") + p.add_argument( + "--format", + default="lines", + help="output format: lines, json; default lines", + ) args = p.parse_args(args) with open(args.padinfo) as f: info = json.load(f) - apiurl = info.get("apiurl") + apiurl = info.get("apiurl") # apiurl = {0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) data = {} data['apikey'] = info['apikey'] - requesturl = apiurl+'listAllPads?'+urlencode(data) + requesturl = apiurl + 'listAllPads?' + urlencode(data) if args.showurl: - print (requesturl) + print(requesturl) else: results = getjson(requesturl)['data']['padIDs'] if args.format == "json": - print (json.dumps(results)) + print(json.dumps(results)) else: for r in results: - print (r) + print(r) diff --git a/etherpump/commands/listauthors.py b/etherpump/commands/listauthors.py index 4f35d7e..9f78223 100644 --- a/etherpump/commands/listauthors.py +++ b/etherpump/commands/listauthors.py @@ -1,17 +1,24 @@ - -from argparse import ArgumentParser import json +from argparse import ArgumentParser +from urllib.error import HTTPError, URLError from urllib.parse import urlencode from urllib.request import urlopen -from urllib.error import HTTPError, URLError def main(args): p = ArgumentParser("call listAuthorsOfPad for the padid") p.add_argument("padid", help="the padid") - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: .etherdump/settings.json", + ) p.add_argument("--showurl", default=False, action="store_true") - p.add_argument("--format", default="lines", help="output format, can be: lines, json; default: lines") + p.add_argument( + "--format", + default="lines", + help="output format, can be: lines, json; default: lines", + ) args = p.parse_args(args) with open(args.padinfo) as f: @@ -20,13 +27,13 @@ def main(args): data = {} data['apikey'] = info['apikey'] data['padID'] = args.padid.encode("utf-8") - requesturl = apiurl+'listAuthorsOfPad?'+urlencode(data) + requesturl = apiurl + 'listAuthorsOfPad?' + urlencode(data) if args.showurl: - print (requesturl) + print(requesturl) else: results = json.load(urlopen(requesturl))['data']['authorIDs'] if args.format == "json": - print (json.dumps(results)) + print(json.dumps(results)) else: for r in results: - print (r.encode("utf-8")) + print(r.encode("utf-8")) diff --git a/etherpump/commands/publication.py b/etherpump/commands/publication.py index 81f7d0d..65c3d4e 100644 --- a/etherpump/commands/publication.py +++ b/etherpump/commands/publication.py @@ -1,17 +1,20 @@ - +import json +import os +import re +import sys +import time from argparse import ArgumentParser -import sys, json, re, os, time from datetime import datetime +from time import sleep +from urllib.parse import quote, urlencode, urlparse, urlunparse +from urllib.request import HTTPError, URLError, urlopen + +from jinja2 import Environment, FileSystemLoader + import dateutil.parser import pypandoc - -from urllib.parse import urlparse, urlunparse, urlencode, quote -from urllib.request import urlopen, URLError, HTTPError - -from jinja2 import FileSystemLoader, Environment from etherpump.commands.common import * -from time import sleep -import dateutil.parser + """ publication: @@ -21,7 +24,8 @@ publication: """ -def group (items, key=lambda x: x): + +def group(items, key=lambda x: x): """ returns a list of lists, of items grouped by a key function """ ret = [] keys = {} @@ -35,10 +39,12 @@ def group (items, key=lambda x: x): ret.append(keys[k]) return ret + # def base (x): # return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) -def splitextlong (x): + +def splitextlong(x): """ split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """ m = re.search(r"^(.*?)(\..*)$", x) if m: @@ -46,20 +52,24 @@ def splitextlong (x): else: return x, '' -def base (x): + +def base(x): return splitextlong(x)[0] -def excerpt (t, chars=25): + +def excerpt(t, chars=25): if len(t) > chars: t = t[:chars] + "..." return t -def absurl (url, base=None): + +def absurl(url, base=None): if not url.startswith("http"): return base + url return url -def url_base (url): + +def url_base(url): (scheme, netloc, path, params, query, fragment) = urlparse(url) path, _ = os.path.split(path.lstrip("/")) ret = urlunparse((scheme, netloc, path, None, None, None)) @@ -67,45 +77,131 @@ def url_base (url): ret += "/" return ret -def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'): + +def datetimeformat(t, format='%Y-%m-%d %H:%M:%S'): if type(t) == str: dt = dateutil.parser.parse(t) return dt.strftime(format) else: return time.strftime(format, time.localtime(t)) -def main (args): + +def main(args): p = ArgumentParser("Convert dumped files to a document via a template.") p.add_argument("input", nargs="+", help="Files to list (.meta.json files)") - p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in") - p.add_argument("--template", default="publication.html", help="template name, built-ins include publication.html; default: publication.html") - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: ./.etherdump/settings.json") + p.add_argument( + "--templatepath", + default=None, + help="path to find templates, default: built-in", + ) + p.add_argument( + "--template", + default="publication.html", + help="template name, built-ins include publication.html; default: publication.html", + ) + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: ./.etherdump/settings.json", + ) # p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") - p.add_argument("--order", default="padid", help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid") - p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)") - p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)") - p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") + p.add_argument( + "--order", + default="padid", + help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid", + ) + p.add_argument( + "--reverse", + default=False, + action="store_true", + help="reverse order, default: False (reverse chrono)", + ) + p.add_argument( + "--limit", + type=int, + default=0, + help="limit to number of items, default: 0 (no limit)", + ) + p.add_argument( + "--skip", + default=None, + type=int, + help="skip this many items, default: None", + ) - p.add_argument("--content", default=False, action="store_true", help="rss: include (full) content tag, default: False") - p.add_argument("--link", default="diffhtml,html,text", help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text") - p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl") + p.add_argument( + "--content", + default=False, + action="store_true", + help="rss: include (full) content tag, default: False", + ) + p.add_argument( + "--link", + default="diffhtml,html,text", + help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text", + ) + p.add_argument( + "--linkbase", + default=None, + help="base url to use for links, default: try to use the feedurl", + ) p.add_argument("--output", default=None, help="output, default: stdout") - p.add_argument("--files", default=False, action="store_true", help="include files (experimental)") + p.add_argument( + "--files", + default=False, + action="store_true", + help="include files (experimental)", + ) pg = p.add_argument_group('template variables') - pg.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml") - pg.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url") - pg.add_argument("--title", default="etherpump", help="title for document or rss feed channel title, default: etherdump") - pg.add_argument("--description", default="", help="rss: channel description, default: empty") - pg.add_argument("--language", default="en-US", help="rss: feed language, default: en-US") - pg.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily") - pg.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1") - pg.add_argument("--generator", default="https://gitlab.com/activearchives/etherpump", help="generator, default: https://gitlab.com/activearchives/etherdump") - pg.add_argument("--timestamp", default=None, help="timestamp, default: now (e.g. 2015-12-01 12:30:00)") + pg.add_argument( + "--feedurl", + default="feed.xml", + help="rss: to use as feeds own (self) link, default: feed.xml", + ) + pg.add_argument( + "--siteurl", + default=None, + help="rss: to use as channel's site link, default: the etherpad url", + ) + pg.add_argument( + "--title", + default="etherpump", + help="title for document or rss feed channel title, default: etherdump", + ) + pg.add_argument( + "--description", + default="", + help="rss: channel description, default: empty", + ) + pg.add_argument( + "--language", default="en-US", help="rss: feed language, default: en-US" + ) + pg.add_argument( + "--updatePeriod", + default="daily", + help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily", + ) + pg.add_argument( + "--updateFrequency", + default=1, + type=int, + help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1", + ) + pg.add_argument( + "--generator", + default="https://gitlab.com/activearchives/etherpump", + help="generator, default: https://gitlab.com/activearchives/etherdump", + ) + pg.add_argument( + "--timestamp", + default=None, + help="timestamp, default: now (e.g. 2015-12-01 12:30:00)", + ) pg.add_argument("--next", default=None, help="next link, default: None)") pg.add_argument("--prev", default=None, help="prev link, default: None") @@ -130,17 +226,12 @@ def main (args): # Use "base" to strip (longest) extensions # inputs = group(inputs, base) - def wrappath (p): + def wrappath(p): path = "./{0}".format(p) ext = os.path.splitext(p)[1][1:] - return { - "url": path, - "path": path, - "code": 200, - "type": ext - } + return {"url": path, "path": path, "code": 200, "type": ext} - def metaforpaths (paths): + def metaforpaths(paths): ret = {} pid = base(paths[0]) ret['pad'] = ret['padid'] = pid @@ -150,7 +241,9 @@ def main (args): mtime = os.stat(p).st_mtime if lastedited == None or mtime > lastedited: lastedited = mtime - ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S") + ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime( + "%Y-%m-%dT%H:%M:%S" + ) ret["lastedited_raw"] = mtime return ret @@ -170,7 +263,7 @@ def main (args): # else: # return metaforpaths(paths) - def fixdates (padmeta): + def fixdates(padmeta): d = dateutil.parser.parse(padmeta["lastedited_iso"]) padmeta["lastedited"] = d padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000") @@ -181,17 +274,21 @@ def main (args): pads = list(map(fixdates, pads)) args.pads = list(pads) - def could_have_base (x, y): - return x == y or (x.startswith(y) and x[len(y):].startswith(".")) + def could_have_base(x, y): + return x == y or (x.startswith(y) and x[len(y) :].startswith(".")) - def get_best_pad (x): + def get_best_pad(x): for pb in padbases: p = pads_by_base[pb] if could_have_base(x, pb): return p - def has_version (padinfo, path): - return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path] + def has_version(padinfo, path): + return [ + x + for x in padinfo['versions'] + if 'path' in x and x['path'] == "./" + path + ] if args.files: inputs = args.input @@ -209,25 +306,33 @@ def main (args): # print ("PADBASES", file=sys.stderr) # for pb in padbases: # print (" ", pb, file=sys.stderr) - print ("pairing input files with pads", file=sys.stderr) + print("pairing input files with pads", file=sys.stderr) for x in inputs: # pair input with a pad if possible xbasename = os.path.basename(x) p = get_best_pad(xbasename) if p: if not has_version(p, x): - print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr) + print( + "Grouping file {0} with pad {1}".format(x, p['padid']), + file=sys.stderr, + ) p['versions'].append(wrappath(x)) else: - print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr) + print( + "Skipping existing version {0} ({1})...".format( + x, p['padid'] + ), + file=sys.stderr, + ) removelist.append(x) # Removed Matches files for x in removelist: inputs.remove(x) - print ("Remaining files:", file=sys.stderr) + print("Remaining files:", file=sys.stderr) for x in inputs: - print (x, file=sys.stderr) - print (file=sys.stderr) + print(x, file=sys.stderr) + print(file=sys.stderr) # Add "fake" pads for remaining files for x in inputs: args.pads.append(metaforpaths([x])) @@ -243,7 +348,9 @@ def main (args): # order items & apply limit if args.order == "lastedited": - args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse) + args.pads.sort( + key=lambda x: x.get("lastedited_iso"), reverse=args.reverse + ) elif args.order == "pad": args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse) elif args.order == "padid": @@ -251,17 +358,20 @@ def main (args): elif args.order == "revisions": args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse) elif args.order == "authors": - args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse) + args.pads.sort( + key=lambda x: len(x.get("authors")), reverse=args.reverse + ) elif args.order == "custom": - # TODO: make this list non-static, but a variable that can be given from the CLI + # TODO: make this list non-static, but a variable that can be given from the CLI customorder = [ 'nooo.relearn.preamble', 'nooo.relearn.activating.the.archive', 'nooo.relearn.call.for.proposals', 'nooo.relearn.call.for.proposals-proposal-footnote', - 'nooo.relearn.colophon'] + 'nooo.relearn.colophon', + ] order = [] for x in customorder: for pad in args.pads: @@ -272,7 +382,7 @@ def main (args): raise Exception("That ordering is not implemented!") if args.limit: - args.pads = args.pads[:args.limit] + args.pads = args.pads[: args.limit] # add versions_by_type, add in full text # add link (based on args.link) @@ -289,7 +399,7 @@ def main (args): if "text" in versions_by_type: # try: - with open (versions_by_type["text"]["path"]) as f: + with open(versions_by_type["text"]["path"]) as f: content = f.read() # print('content:', content) # [Relearn] Add pandoc command here? @@ -297,7 +407,7 @@ def main (args): # print('html:', html) p["text"] = html # except FileNotFoundError: - # p['text'] = 'ERROR' + # p['text'] = 'ERROR' # ADD IN LINK TO PAD AS "link" for v in linkversions: @@ -312,6 +422,6 @@ def main (args): if args.output: with open(args.output, "w") as f: - print (template.render(vars(args)), file=f) + print(template.render(vars(args)), file=f) else: - print (template.render(vars(args))) + print(template.render(vars(args))) diff --git a/etherpump/commands/pull.py b/etherpump/commands/pull.py index a50d0bb..875667b 100644 --- a/etherpump/commands/pull.py +++ b/etherpump/commands/pull.py @@ -1,17 +1,19 @@ - +import json +import os +import re +import sys from argparse import ArgumentParser -import sys, json, re, os from datetime import datetime +from fnmatch import fnmatch +from time import sleep +from urllib.parse import quote, urlencode +from urllib.request import HTTPError, URLError, urlopen +from xml.etree import ElementTree as ET -from urllib.parse import urlencode, quote -from urllib.request import urlopen, URLError, HTTPError +import html5lib from etherpump.commands.common import * -from time import sleep from etherpump.commands.html5tidy import html5tidy -import html5lib -from xml.etree import ElementTree as ET -from fnmatch import fnmatch # debugging # import ElementTree as ET @@ -28,43 +30,144 @@ use/prefer public interfaces ? (export functions) """ -def try_deleting (files): + +def try_deleting(files): for f in files: try: os.remove(f) except OSError as e: pass -def main (args): - p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)") + +def main(args): + p = ArgumentParser( + "Check for pads that have changed since last sync (according to .meta.json)" + ) p.add_argument("padid", nargs="*", default=[]) - p.add_argument("--glob", default=False, help="download pads matching a glob pattern") + p.add_argument( + "--glob", default=False, help="download pads matching a glob pattern" + ) - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherpump/settings.json") - p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") - p.add_argument("--pub", default="p", help="folder to store files for public pads, default: p") - p.add_argument("--group", default="g", help="folder to store files for group pads, default: g") - p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") - p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False") - p.add_argument("--text", default=False, action="store_true", help="download text to PADID.txt, default: False") - p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False") - p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.diff.html, default: False") - p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False") - p.add_argument("--folder", default=False, action="store_true", help="dump files in a folder named PADID (meta, text, html, dhtml), default: False") - p.add_argument("--output", default=False, action="store_true", help="output changed padids on stdout") - p.add_argument("--force", default=False, action="store_true", help="reload, even if revisions count matches previous") - p.add_argument("--no-raw-ext", default=False, action="store_true", help="save plain text as padname with no (additional) extension") - p.add_argument("--fix-names", default=False, action="store_true", help="normalize padid's (no spaces, special control chars) for use in file names") + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: .etherpump/settings.json", + ) + p.add_argument( + "--zerorevs", + default=False, + action="store_true", + help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)", + ) + p.add_argument( + "--pub", + default="p", + help="folder to store files for public pads, default: p", + ) + p.add_argument( + "--group", + default="g", + help="folder to store files for group pads, default: g", + ) + p.add_argument( + "--skip", + default=None, + type=int, + help="skip this many items, default: None", + ) + p.add_argument( + "--meta", + default=False, + action="store_true", + help="download meta to PADID.meta.json, default: False", + ) + p.add_argument( + "--text", + default=False, + action="store_true", + help="download text to PADID.txt, default: False", + ) + p.add_argument( + "--html", + default=False, + action="store_true", + help="download html to PADID.html, default: False", + ) + p.add_argument( + "--dhtml", + default=False, + action="store_true", + help="download dhtml to PADID.diff.html, default: False", + ) + p.add_argument( + "--all", + default=False, + action="store_true", + help="download all files (meta, text, html, dhtml), default: False", + ) + p.add_argument( + "--folder", + default=False, + action="store_true", + help="dump files in a folder named PADID (meta, text, html, dhtml), default: False", + ) + p.add_argument( + "--output", + default=False, + action="store_true", + help="output changed padids on stdout", + ) + p.add_argument( + "--force", + default=False, + action="store_true", + help="reload, even if revisions count matches previous", + ) + p.add_argument( + "--no-raw-ext", + default=False, + action="store_true", + help="save plain text as padname with no (additional) extension", + ) + p.add_argument( + "--fix-names", + default=False, + action="store_true", + help="normalize padid's (no spaces, special control chars) for use in file names", + ) - p.add_argument("--filter-ext", default=None, help="filter pads by extension") + p.add_argument( + "--filter-ext", default=None, help="filter pads by extension" + ) - p.add_argument("--css", default="/styles.css", help="add css url to output pages, default: /styles.css") - p.add_argument("--script", default="/versions.js", help="add script url to output pages, default: /versions.js") + p.add_argument( + "--css", + default="/styles.css", + help="add css url to output pages, default: /styles.css", + ) + p.add_argument( + "--script", + default="/versions.js", + help="add script url to output pages, default: /versions.js", + ) - p.add_argument("--nopublish", default="__NOPUBLISH__", help="no publish magic word, default: __NOPUBLISH__") - p.add_argument("--publish", default="__PUBLISH__", help="the publish magic word, default: __PUBLISH__") - p.add_argument("--publish-opt-in", default=False, action="store_true", help="ensure `--publish` is honoured instead of `--nopublish`") + p.add_argument( + "--nopublish", + default="__NOPUBLISH__", + help="no publish magic word, default: __NOPUBLISH__", + ) + p.add_argument( + "--publish", + default="__PUBLISH__", + help="the publish magic word, default: __PUBLISH__", + ) + p.add_argument( + "--publish-opt-in", + default=False, + action="store_true", + help="ensure `--publish` is honoured instead of `--nopublish`", + ) args = p.parse_args(args) @@ -79,16 +182,20 @@ def main (args): if args.padid: padids = args.padid elif args.glob: - padids = getjson(info['localapiurl']+'listAllPads?'+urlencode(data))['data']['padIDs'] + padids = getjson( + info['localapiurl'] + 'listAllPads?' + urlencode(data) + )['data']['padIDs'] padids = [x for x in padids if fnmatch(x, args.glob)] else: - padids = getjson(info['localapiurl']+'listAllPads?'+urlencode(data))['data']['padIDs'] + padids = getjson( + info['localapiurl'] + 'listAllPads?' + urlencode(data) + )['data']['padIDs'] padids.sort() numpads = len(padids) # maxmsglen = 0 count = 0 for i, padid in enumerate(padids): - if args.skip != None and i 3: - print ("Too many failures ({0}), skipping".format(padid), file=sys.stderr) - skip=True + print( + "Too many failures ({0}), skipping".format(padid), + file=sys.stderr, + ) + skip = True break else: sleep(3) except TypeError as e: - print ("Type Error loading pad {0} (phantom pad?), skipping".format(padid), file=sys.stderr) - skip=True + print( + "Type Error loading pad {0} (phantom pad?), skipping".format( + padid + ), + file=sys.stderr, + ) + skip = True break if skip: @@ -159,7 +292,7 @@ def main (args): count += 1 if args.output: - print (padid) + print(padid) if args.all or (args.meta or args.text or args.html or args.dhtml): try: @@ -168,7 +301,7 @@ def main (args): pass if args.all or args.text: - text = getjson(info['localapiurl']+'getText?'+urlencode(data)) + text = getjson(info['localapiurl'] + 'getText?' + urlencode(data)) ver = {"type": "text"} versions.append(ver) ver["code"] = text["_code"] @@ -180,17 +313,31 @@ def main (args): ########################################## if args.nopublish and args.nopublish in text: # NEED TO PURGE ANY EXISTING DOCS - try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json")) + try_deleting( + ( + p + raw_ext, + p + ".raw.html", + p + ".diff.html", + p + ".meta.json", + ) + ) continue ########################################## ## ENFORCE __PUBLISH__ MAGIC WORD ########################################## if args.publish_opt_in and args.publish not in text: - try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json")) + try_deleting( + ( + p + raw_ext, + p + ".raw.html", + p + ".diff.html", + p + ".meta.json", + ) + ) continue - ver["path"] = p+raw_ext + ver["path"] = p + raw_ext ver["url"] = quote(ver["path"]) with open(ver["path"], "w") as f: f.write(text) @@ -199,38 +346,86 @@ def main (args): links = [] if args.css: - links.append({"href":args.css, "rel":"stylesheet"}) + links.append({"href": args.css, "rel": "stylesheet"}) # todo, make this process reflect which files actually were made versionbaseurl = quote(padid) - links.append({"href":versions[0]["url"], "rel":"alternate", "type":"text/html", "title":"Etherpad"}) + links.append( + { + "href": versions[0]["url"], + "rel": "alternate", + "type": "text/html", + "title": "Etherpad", + } + ) if args.all or args.text: - links.append({"href":versionbaseurl+raw_ext, "rel":"alternate", "type":"text/plain", "title":"Plain text"}) + links.append( + { + "href": versionbaseurl + raw_ext, + "rel": "alternate", + "type": "text/plain", + "title": "Plain text", + } + ) if args.all or args.html: - links.append({"href":versionbaseurl+".raw.html", "rel":"alternate", "type":"text/html", "title":"HTML"}) + links.append( + { + "href": versionbaseurl + ".raw.html", + "rel": "alternate", + "type": "text/html", + "title": "HTML", + } + ) if args.all or args.dhtml: - links.append({"href":versionbaseurl+".diff.html", "rel":"alternate", "type":"text/html", "title":"HTML with author colors"}) + links.append( + { + "href": versionbaseurl + ".diff.html", + "rel": "alternate", + "type": "text/html", + "title": "HTML with author colors", + } + ) if args.all or args.meta: - links.append({"href":versionbaseurl+".meta.json", "rel":"alternate", "type":"application/json", "title":"Meta data"}) + links.append( + { + "href": versionbaseurl + ".meta.json", + "rel": "alternate", + "type": "application/json", + "title": "Meta data", + } + ) # links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"}) if args.all or args.dhtml: data['startRev'] = "0" - html = getjson(info['localapiurl']+'createDiffHTML?'+urlencode(data)) + html = getjson( + info['localapiurl'] + 'createDiffHTML?' + urlencode(data) + ) ver = {"type": "diffhtml"} versions.append(ver) ver["code"] = html["_code"] if html["_code"] == 200: try: html = html['data']['html'] - ver["path"] = p+".diff.html" + ver["path"] = p + ".diff.html" ver["url"] = quote(ver["path"]) # doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False) - doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False) - html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links) + doc = html5lib.parse( + html, treebuilder="etree", namespaceHTMLElements=False + ) + html5tidy( + doc, + indent=True, + title=padid, + scripts=args.script, + links=links, + ) with open(ver["path"], "w") as f: # f.write(html.encode("utf-8")) - print(ET.tostring(doc, method="html", encoding="unicode"), file=f) + print( + ET.tostring(doc, method="html", encoding="unicode"), + file=f, + ) except TypeError: # Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file! ver["message"] = html["message"] @@ -239,19 +434,30 @@ def main (args): # Process text, html, dhtml, all options if args.all or args.html: - html = getjson(info['localapiurl']+'getHTML?'+urlencode(data)) + html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data)) ver = {"type": "html"} versions.append(ver) ver["code"] = html["_code"] if html["_code"] == 200: html = html['data']['html'] - ver["path"] = p+".raw.html" + ver["path"] = p + ".raw.html" ver["url"] = quote(ver["path"]) - doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False) - html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links) + doc = html5lib.parse( + html, treebuilder="etree", namespaceHTMLElements=False + ) + html5tidy( + doc, + indent=True, + title=padid, + scripts=args.script, + links=links, + ) with open(ver["path"], "w") as f: # f.write(html.encode("utf-8")) - print (ET.tostring(doc, method="html", encoding="unicode"), file=f) + print( + ET.tostring(doc, method="html", encoding="unicode"), + file=f, + ) # output meta if args.all or args.meta: diff --git a/etherpump/commands/revisionscount.py b/etherpump/commands/revisionscount.py index 8808501..cee0e0e 100644 --- a/etherpump/commands/revisionscount.py +++ b/etherpump/commands/revisionscount.py @@ -1,14 +1,18 @@ - -from argparse import ArgumentParser import json +from argparse import ArgumentParser +from urllib.error import HTTPError, URLError from urllib.parse import urlencode from urllib.request import urlopen -from urllib.error import HTTPError, URLError + def main(args): p = ArgumentParser("call getRevisionsCount for the given padid") p.add_argument("padid", help="the padid") - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: .etherdump/settings.json", + ) p.add_argument("--showurl", default=False, action="store_true") args = p.parse_args(args) @@ -18,9 +22,9 @@ def main(args): data = {} data['apikey'] = info['apikey'] data['padID'] = args.padid.encode("utf-8") - requesturl = apiurl+'getRevisionsCount?'+urlencode(data) + requesturl = apiurl + 'getRevisionsCount?' + urlencode(data) if args.showurl: - print (requesturl) + print(requesturl) else: results = json.load(urlopen(requesturl))['data']['revisions'] - print (results) + print(results) diff --git a/etherpump/commands/sethtml.py b/etherpump/commands/sethtml.py index 9180259..5bf6835 100644 --- a/etherpump/commands/sethtml.py +++ b/etherpump/commands/sethtml.py @@ -1,39 +1,60 @@ - +import json +import sys from argparse import ArgumentParser -import json, sys +from urllib.error import HTTPError, URLError from urllib.parse import urlencode from urllib.request import urlopen -from urllib.error import HTTPError, URLError + import requests +LIMIT_BYTES = 100 * 1000 -LIMIT_BYTES = 100*1000 def main(args): p = ArgumentParser("calls the setHTML API function for the given padid") p.add_argument("padid", help="the padid") - p.add_argument("--html", default=None, help="html, default: read from stdin") - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") + p.add_argument( + "--html", default=None, help="html, default: read from stdin" + ) + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: .etherdump/settings.json", + ) p.add_argument("--showurl", default=False, action="store_true") # p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") - p.add_argument("--create", default=False, action="store_true", help="flag to create pad if necessary") - p.add_argument("--limit", default=False, action="store_true", help="limit text to 100k (etherpad limit)") + p.add_argument( + "--create", + default=False, + action="store_true", + help="flag to create pad if necessary", + ) + p.add_argument( + "--limit", + default=False, + action="store_true", + help="limit text to 100k (etherpad limit)", + ) args = p.parse_args(args) with open(args.padinfo) as f: info = json.load(f) apiurl = info.get("apiurl") # apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) -# data = {} -# data['apikey'] = info['apikey'] -# data['padID'] = args.padid # is utf-8 encoded + # data = {} + # data['apikey'] = info['apikey'] + # data['padID'] = args.padid # is utf-8 encoded createPad = False if args.create: # check if it's in fact necessary - requesturl = apiurl+'getRevisionsCount?'+urlencode({'apikey': info['apikey'], 'padID': args.padid}) + requesturl = ( + apiurl + + 'getRevisionsCount?' + + urlencode({'apikey': info['apikey'], 'padID': args.padid}) + ) results = json.load(urlopen(requesturl)) - print (json.dumps(results, indent=2), file=sys.stderr) + print(json.dumps(results, indent=2), file=sys.stderr) if results['code'] != 0: createPad = True @@ -47,21 +68,27 @@ def main(args): params['padID'] = args.padid if createPad: - requesturl = apiurl+'createPad' + requesturl = apiurl + 'createPad' if args.showurl: - print (requesturl) - results = requests.post(requesturl, params=params, data={'text': ''}) # json.load(urlopen(requesturl)) + print(requesturl) + results = requests.post( + requesturl, params=params, data={'text': ''} + ) # json.load(urlopen(requesturl)) results = json.loads(results.text) - print (json.dumps(results, indent=2)) + print(json.dumps(results, indent=2)) if len(html) > LIMIT_BYTES and args.limit: - print ("limiting", len(text), LIMIT_BYTES, file=sys.stderr) + print("limiting", len(text), LIMIT_BYTES, file=sys.stderr) html = html[:LIMIT_BYTES] - requesturl = apiurl+'setHTML' + requesturl = apiurl + 'setHTML' if args.showurl: - print (requesturl) + print(requesturl) # params['html'] = html - results = requests.post(requesturl, params={'apikey': info['apikey']}, data={'apikey': info['apikey'], 'padID': args.padid, 'html': html}) # json.load(urlopen(requesturl)) + results = requests.post( + requesturl, + params={'apikey': info['apikey']}, + data={'apikey': info['apikey'], 'padID': args.padid, 'html': html}, + ) # json.load(urlopen(requesturl)) results = json.loads(results.text) - print (json.dumps(results, indent=2)) + print(json.dumps(results, indent=2)) diff --git a/etherpump/commands/settext.py b/etherpump/commands/settext.py index b41f86f..8368e22 100644 --- a/etherpump/commands/settext.py +++ b/etherpump/commands/settext.py @@ -1,24 +1,39 @@ - +import json +import sys from argparse import ArgumentParser -import json, sys - -from urllib.parse import urlencode, quote -from urllib.request import urlopen, URLError, HTTPError +from urllib.parse import quote, urlencode +from urllib.request import HTTPError, URLError, urlopen import requests +LIMIT_BYTES = 100 * 1000 -LIMIT_BYTES = 100*1000 def main(args): p = ArgumentParser("calls the getText API function for the given padid") p.add_argument("padid", help="the padid") - p.add_argument("--text", default=None, help="text, default: read from stdin") - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") + p.add_argument( + "--text", default=None, help="text, default: read from stdin" + ) + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: .etherdump/settings.json", + ) p.add_argument("--showurl", default=False, action="store_true") # p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") - p.add_argument("--create", default=False, action="store_true", help="flag to create pad if necessary") - p.add_argument("--limit", default=False, action="store_true", help="limit text to 100k (etherpad limit)") + p.add_argument( + "--create", + default=False, + action="store_true", + help="flag to create pad if necessary", + ) + p.add_argument( + "--limit", + default=False, + action="store_true", + help="limit text to 100k (etherpad limit)", + ) args = p.parse_args(args) with open(args.padinfo) as f: @@ -27,11 +42,11 @@ def main(args): # apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) data = {} data['apikey'] = info['apikey'] - data['padID'] = args.padid # is utf-8 encoded + data['padID'] = args.padid # is utf-8 encoded createPad = False if args.create: - requesturl = apiurl+'getRevisionsCount?'+urlencode(data) + requesturl = apiurl + 'getRevisionsCount?' + urlencode(data) results = json.load(urlopen(requesturl)) # print (json.dumps(results, indent=2)) if results['code'] != 0: @@ -43,20 +58,26 @@ def main(args): text = sys.stdin.read() if len(text) > LIMIT_BYTES and args.limit: - print ("limiting", len(text), LIMIT_BYTES) + print("limiting", len(text), LIMIT_BYTES) text = text[:LIMIT_BYTES] data['text'] = text if createPad: - requesturl = apiurl+'createPad' + requesturl = apiurl + 'createPad' else: - requesturl = apiurl+'setText' + requesturl = apiurl + 'setText' if args.showurl: - print (requesturl) - results = requests.post(requesturl, params=data) # json.load(urlopen(requesturl)) + print(requesturl) + results = requests.post( + requesturl, params=data + ) # json.load(urlopen(requesturl)) results = json.loads(results.text) if results['code'] != 0: - print ("setText: ERROR ({0}) on pad {1}: {2}".format(results['code'], args.padid, results['message'])) + print( + "setText: ERROR ({0}) on pad {1}: {2}".format( + results['code'], args.padid, results['message'] + ) + ) # json.dumps(results, indent=2) diff --git a/etherpump/commands/showmeta.py b/etherpump/commands/showmeta.py index a1c8149..db44205 100644 --- a/etherpump/commands/showmeta.py +++ b/etherpump/commands/showmeta.py @@ -1,17 +1,25 @@ - +import json +import re +import sys from argparse import ArgumentParser -import json, sys, re + from .common import * + """ Extract and output selected fields of metadata """ -def main (args): - p = ArgumentParser("extract & display meta data from a specific .meta.json file, or for a given padid (nb: it still looks for a .meta.json file)") + +def main(args): + p = ArgumentParser( + "extract & display meta data from a specific .meta.json file, or for a given padid (nb: it still looks for a .meta.json file)" + ) p.add_argument("--path", default=None, help="read from a meta.json file") p.add_argument("--padid", default=None, help="read meta for this padid") - p.add_argument("--format", default="{padid}", help="format str, default: {padid}") + p.add_argument( + "--format", default="{padid}", help="format str, default: {padid}" + ) args = p.parse_args(args) path = args.path @@ -19,7 +27,7 @@ def main (args): path = padpath(args.padid) + ".meta.json" if not path: - print ("Must specify either --path or --padid") + print("Must specify either --path or --padid") sys.exit(-1) with open(path) as f: @@ -27,5 +35,4 @@ def main (args): formatstr = args.format.decode("utf-8") formatstr = re.sub(r"{(\w+)}", r"{0[\1]}", formatstr) - print (formatstr.format(meta).encode("utf-8")) - + print(formatstr.format(meta).encode("utf-8")) diff --git a/etherpump/commands/status.py b/etherpump/commands/status.py index 82e4792..b08cda6 100644 --- a/etherpump/commands/status.py +++ b/etherpump/commands/status.py @@ -1,13 +1,17 @@ - +import json +import os +import re +import sys from argparse import ArgumentParser -import sys, json, re, os from datetime import datetime +from math import ceil, floor +from urllib.error import HTTPError, URLError from urllib.parse import urlencode from urllib.request import urlopen -from urllib.error import HTTPError, URLError -from math import ceil, floor + from .common import * + """ status (meta): Update meta data files for those that have changed. @@ -22,16 +26,18 @@ complicates the "syncing" idea.... """ -class PadItemException (Exception): + +class PadItemException(Exception): pass -class PadItem (): - def __init__ (self, padid=None, path=None, padexists=False): + +class PadItem: + def __init__(self, padid=None, path=None, padexists=False): self.padexists = padexists if padid and path: raise PadItemException("only give padid or path") if not (padid or path): - raise PadItemException("either padid or path must be specified") + raise PadItemException("either padid or path must be specified") if padid: self.padid = padid self.path = padpath(padid, group_path="g") @@ -40,7 +46,7 @@ class PadItem (): self.padid = padpath2id(path) @property - def status (self): + def status(self): if self.fileexists: if self.padexists: return "S" @@ -52,26 +58,77 @@ class PadItem (): return "?" @property - def fileexists (self): + def fileexists(self): return os.path.exists(self.path) -def ignore_p (path, settings=None): + +def ignore_p(path, settings=None): if path.startswith("."): return True -def main (args): - p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)") + +def main(args): + p = ArgumentParser( + "Check for pads that have changed since last sync (according to .meta.json)" + ) # p.add_argument("padid", nargs="*", default=[]) - p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") - p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") - p.add_argument("--pub", default=".", help="folder to store files for public pads, default: pub") - p.add_argument("--group", default="g", help="folder to store files for group pads, default: g") - p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") - p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False") - p.add_argument("--text", default=False, action="store_true", help="download text to PADID.txt, default: False") - p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False") - p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.dhtml, default: False") - p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False") + p.add_argument( + "--padinfo", + default=".etherpump/settings.json", + help="settings, default: .etherdump/settings.json", + ) + p.add_argument( + "--zerorevs", + default=False, + action="store_true", + help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)", + ) + p.add_argument( + "--pub", + default=".", + help="folder to store files for public pads, default: pub", + ) + p.add_argument( + "--group", + default="g", + help="folder to store files for group pads, default: g", + ) + p.add_argument( + "--skip", + default=None, + type=int, + help="skip this many items, default: None", + ) + p.add_argument( + "--meta", + default=False, + action="store_true", + help="download meta to PADID.meta.json, default: False", + ) + p.add_argument( + "--text", + default=False, + action="store_true", + help="download text to PADID.txt, default: False", + ) + p.add_argument( + "--html", + default=False, + action="store_true", + help="download html to PADID.html, default: False", + ) + p.add_argument( + "--dhtml", + default=False, + action="store_true", + help="download dhtml to PADID.dhtml, default: False", + ) + p.add_argument( + "--all", + default=False, + action="store_true", + help="download all files (meta, text, html, dhtml), default: False", + ) args = p.parse_args(args) info = loadpadinfo(args.padinfo) @@ -81,7 +138,9 @@ def main (args): padsbypath = {} # listAllPads - padids = getjson(info['apiurl']+'listAllPads?'+urlencode(data))['data']['padIDs'] + padids = getjson(info['apiurl'] + 'listAllPads?' + urlencode(data))['data'][ + 'padIDs' + ] padids.sort() for padid in padids: pad = PadItem(padid=padid, padexists=True) @@ -104,9 +163,9 @@ def main (args): if p.status != curstat: curstat = p.status if curstat == "F": - print ("New/changed files") + print("New/changed files") elif curstat == "P": - print ("New/changed pads") + print("New/changed pads") elif curstat == ".": - print ("Up to date") - print (" ", p.status, p.padid) + print("Up to date") + print(" ", p.status, p.padid) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..35b5a1b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,12 @@ +[build-system] +requires = [ + "setuptools>=41.0.0", + "setuptools-scm", + "wheel", +] +build-backend = "setuptools.build_meta" + +[tool.black] +line-length = 80 +target-version = ['py35', 'py36', 'py37'] +skip-string-normalization = true diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..35a33b9 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,9 @@ +[flake8] +max-line-length = 80 + +[isort] +known_first_party = etherpump +line_length = 80 +multi_line_output = 3 +include_trailing_comma = True +skip = .venv diff --git a/setup.py b/setup.py index c9bf0b6..ffb4ef8 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 -from etherpump import VERSION from setuptools import find_packages, setup +from etherpump import VERSION + with open('README.md', 'r') as handle: long_description = handle.read()