Add maintenance tools and run them

This commit is contained in:
Luke Murphy 2019-09-27 23:14:30 +02:00
parent 159165d2d5
commit 8f18594833
No known key found for this signature in database
GPG Key ID: 5E2EF5A63E3718CC
27 changed files with 1253 additions and 465 deletions

View File

@ -1,4 +1,13 @@
SOURCE_DIRS := bin/ etherpump/
publish: publish:
@rm -rf dist @rm -rf dist
@python setup.py bdist_wheel @python setup.py bdist_wheel
@twine upload dist/* @twine upload dist/*
format:
@black $(SOURCE_DIRS)
@isort -rc $(SOURCE_DIRS)
lint:
@flake8 $(SOURCE_DIRS)

View File

@ -126,6 +126,23 @@ Publishing
You should have a [PyPi](https://pypi.org/) account and be added as an owner/maintainer on the [etherpump package](https://pypi.org/project/etherpump/). You should have a [PyPi](https://pypi.org/) account and be added as an owner/maintainer on the [etherpump package](https://pypi.org/project/etherpump/).
Maintenance utilities
---------------------
Tools to help things stay tidy over time.
```bash
$ pip install flake8 isort black
$ make format
$ make lint
```
Please see the following links for further reading:
* http://flake8.pycqa.org
* https://isort.readthedocs.io
* https://black.readthedocs.io
License License
======= =======

View File

@ -1,8 +1,9 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from etherpump import VERSION
import sys import sys
from etherpump import VERSION
usage = """Usage: usage = """Usage:
etherpump CMD etherpump CMD
@ -43,7 +44,9 @@ except IndexError:
sys.exit(0) sys.exit(0)
try: try:
# http://stackoverflow.com/questions/301134/dynamic-module-import-in-python # http://stackoverflow.com/questions/301134/dynamic-module-import-in-python
cmdmod = __import__("etherpump.commands.%s" % cmd, fromlist=["etherdump.commands"]) cmdmod = __import__(
"etherpump.commands.%s" % cmd, fromlist=["etherdump.commands"]
)
cmdmod.main(args) cmdmod.main(args)
except ImportError as e: except ImportError as e:
print("Error performing command '{0}'\n(python said: {1})\n".format(cmd, e)) print("Error performing command '{0}'\n(python said: {1})\n".format(cmd, e))

View File

@ -1,4 +1,4 @@
import os import os
DATAPATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data") DATAPATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data")
VERSION = '0.0.2' VERSION = '0.0.2'

View File

@ -1,8 +1,10 @@
#!/usr/bin/env python #!/usr/bin/env python
import json
import os
from argparse import ArgumentParser from argparse import ArgumentParser
import json, os
def main(args): def main(args):
p = ArgumentParser("") p = ArgumentParser("")
@ -18,6 +20,6 @@ def main(args):
ret.append(meta) ret.append(meta)
if args.indent: if args.indent:
print (json.dumps(ret, indent=args.indent)) print(json.dumps(ret, indent=args.indent))
else: else:
print (json.dumps(ret)) print(json.dumps(ret))

View File

@ -1,24 +1,35 @@
import json
import re, os, json, sys import os
import re
import sys
from html.entities import name2codepoint
from math import ceil, floor from math import ceil, floor
from time import sleep from time import sleep
from urllib.parse import (
from urllib.parse import urlparse, urlunparse, urlencode, quote_plus, unquote_plus quote_plus,
from urllib.request import urlopen, URLError, HTTPError unquote_plus,
from html.entities import name2codepoint urlencode,
urlparse,
urlunparse,
)
from urllib.request import HTTPError, URLError, urlopen
groupnamepat = re.compile(r"^g\.(\w+)\$") groupnamepat = re.compile(r"^g\.(\w+)\$")
def splitpadname (padid):
def splitpadname(padid):
m = groupnamepat.match(padid) m = groupnamepat.match(padid)
if m: if m:
return(m.group(1), padid[m.end():]) return (m.group(1), padid[m.end() :])
else: else:
return ("", padid) return ("", padid)
def padurl (padid, ):
def padurl(padid,):
return padid return padid
def padpath (padid, pub_path="", group_path="", normalize=False):
def padpath(padid, pub_path="", group_path="", normalize=False):
g, p = splitpadname(padid) g, p = splitpadname(padid)
p = quote_plus(p) p = quote_plus(p)
if normalize: if normalize:
@ -32,7 +43,8 @@ def padpath (padid, pub_path="", group_path="", normalize=False):
else: else:
return os.path.join(pub_path, p) return os.path.join(pub_path, p)
def padpath2id (path):
def padpath2id(path):
if type(path) == str: if type(path) == str:
path = path.encode("utf-8") path = path.encode("utf-8")
dd, p = os.path.split(path) dd, p = os.path.split(path)
@ -43,7 +55,8 @@ def padpath2id (path):
else: else:
return p.decode("utf-8") return p.decode("utf-8")
def getjson (url, max_retry=3, retry_sleep_time=3):
def getjson(url, max_retry=3, retry_sleep_time=3):
ret = {} ret = {}
ret["_retries"] = 0 ret["_retries"] = 0
while ret["_retries"] <= max_retry: while ret["_retries"] <= max_retry:
@ -61,13 +74,14 @@ def getjson (url, max_retry=3, retry_sleep_time=3):
except ValueError as e: except ValueError as e:
url = "http://localhost" + url url = "http://localhost" + url
except HTTPError as e: except HTTPError as e:
print ("HTTPError {0}".format(e), file=sys.stderr) print("HTTPError {0}".format(e), file=sys.stderr)
ret["_code"] = e.code ret["_code"] = e.code
ret["_retries"]+=1 ret["_retries"] += 1
if retry_sleep_time: if retry_sleep_time:
sleep(retry_sleep_time) sleep(retry_sleep_time)
return ret return ret
def loadpadinfo(p): def loadpadinfo(p):
with open(p) as f: with open(p) as f:
info = json.load(f) info = json.load(f)
@ -75,17 +89,17 @@ def loadpadinfo(p):
info['localapiurl'] = info.get('apiurl') info['localapiurl'] = info.get('apiurl')
return info return info
def progressbar (i, num, label="", file=sys.stderr):
def progressbar(i, num, label="", file=sys.stderr):
p = float(i) / num p = float(i) / num
percentage = int(floor(p*100)) percentage = int(floor(p * 100))
bars = int(ceil(p*20)) bars = int(ceil(p * 20))
bar = ("*"*bars) + ("-"*(20-bars)) bar = ("*" * bars) + ("-" * (20 - bars))
msg = "\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label) msg = "\r{0} {1}/{2} {3}... ".format(bar, (i + 1), num, label)
sys.stderr.write(msg) sys.stderr.write(msg)
sys.stderr.flush() sys.stderr.flush()
# Python developer Fredrik Lundh (author of elementtree, among other things) has such a function on his website, which works with decimal, hex and named entities: # Python developer Fredrik Lundh (author of elementtree, among other things) has such a function on his website, which works with decimal, hex and named entities:
## ##
# Removes HTML or XML character references and entities from a text string. # Removes HTML or XML character references and entities from a text string.
@ -110,5 +124,6 @@ def unescape(text):
text = chr(name2codepoint[text[1:-1]]) text = chr(name2codepoint[text[1:-1]])
except KeyError: except KeyError:
pass pass
return text # leave as is return text # leave as is
return re.sub("&#?\w+;", fixup, text) return re.sub("&#?\w+;", fixup, text)

View File

@ -1,18 +1,29 @@
from argparse import ArgumentParser
import json import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import urlopen from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args): def main(args):
p = ArgumentParser("calls the createDiffHTML API function for the given padid") p = ArgumentParser(
"calls the createDiffHTML API function for the given padid"
)
p.add_argument("padid", help="the padid") p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true") p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") p.add_argument(
p.add_argument("--rev", type=int, default=None, help="revision, default: latest") "--format",
default="text",
help="output format, can be: text, json; default: text",
)
p.add_argument(
"--rev", type=int, default=None, help="revision, default: latest"
)
args = p.parse_args(args) args = p.parse_args(args)
with open(args.padinfo) as f: with open(args.padinfo) as f:
@ -25,15 +36,15 @@ def main(args):
data['startRev'] = "0" data['startRev'] = "0"
if args.rev != None: if args.rev != None:
data['rev'] = args.rev data['rev'] = args.rev
requesturl = apiurl+'createDiffHTML?'+urlencode(data) requesturl = apiurl + 'createDiffHTML?' + urlencode(data)
if args.showurl: if args.showurl:
print (requesturl) print(requesturl)
else: else:
try: try:
results = json.load(urlopen(requesturl))['data'] results = json.load(urlopen(requesturl))['data']
if args.format == "json": if args.format == "json":
print (json.dumps(results)) print(json.dumps(results))
else: else:
print (results['html'].encode("utf-8")) print(results['html'].encode("utf-8"))
except HTTPError as e: except HTTPError as e:
pass pass

View File

@ -1,17 +1,24 @@
from argparse import ArgumentParser
import json import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import urlopen from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args): def main(args):
p = ArgumentParser("calls the getText API function for the given padid") p = ArgumentParser("calls the getText API function for the given padid")
p.add_argument("padid", help="the padid") p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true") p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") p.add_argument(
"--format",
default="text",
help="output format, can be: text, json; default: text",
)
args = p.parse_args(args) args = p.parse_args(args)
with open(args.padinfo) as f: with open(args.padinfo) as f:
@ -20,14 +27,14 @@ def main(args):
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) # apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
data = {} data = {}
data['apikey'] = info['apikey'] data['apikey'] = info['apikey']
data['padID'] = args.padid # is utf-8 encoded data['padID'] = args.padid # is utf-8 encoded
requesturl = apiurl+'deletePad?'+urlencode(data) requesturl = apiurl + 'deletePad?' + urlencode(data)
if args.showurl: if args.showurl:
print (requesturl) print(requesturl)
else: else:
results = json.load(urlopen(requesturl)) results = json.load(urlopen(requesturl))
if args.format == "json": if args.format == "json":
print (json.dumps(results)) print(json.dumps(results))
else: else:
if results['data']: if results['data']:
print (results['data']['text'].encode("utf-8")) print(results['data']['text'].encode("utf-8"))

View File

@ -1,12 +1,13 @@
import json
import re
import sys
from argparse import ArgumentParser from argparse import ArgumentParser
import sys, json, re from csv import writer
from datetime import datetime from datetime import datetime
from math import ceil, floor
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import urlopen from urllib.request import urlopen
from urllib.error import HTTPError, URLError
from csv import writer
from math import ceil, floor
""" """
Dumps a CSV of all pads with columns Dumps a CSV of all pads with columns
@ -23,16 +24,27 @@ groupnamepat = re.compile(r"^g\.(\w+)\$")
out = writer(sys.stdout) out = writer(sys.stdout)
def jsonload (url):
def jsonload(url):
f = urlopen(url) f = urlopen(url)
data = f.read() data = f.read()
f.close() f.close()
return json.loads(data) return json.loads(data)
def main (args):
def main(args):
p = ArgumentParser("outputs a CSV of information all all pads") p = ArgumentParser("outputs a CSV of information all all pads")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument(
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False") "--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument(
"--zerorevs",
default=False,
action="store_true",
help="include pads with zero revisions, default: False",
)
args = p.parse_args(args) args = p.parse_args(args)
with open(args.padinfo) as f: with open(args.padinfo) as f:
@ -40,7 +52,7 @@ def main (args):
apiurl = info.get("apiurl") apiurl = info.get("apiurl")
data = {} data = {}
data['apikey'] = info['apikey'] data['apikey'] = info['apikey']
requesturl = apiurl+'listAllPads?'+urlencode(data) requesturl = apiurl + 'listAllPads?' + urlencode(data)
padids = jsonload(requesturl)['data']['padIDs'] padids = jsonload(requesturl)['data']['padIDs']
padids.sort() padids.sort()
@ -49,36 +61,50 @@ def main (args):
count = 0 count = 0
out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids")) out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids"))
for i, padid in enumerate(padids): for i, padid in enumerate(padids):
p = (float(i) / numpads) p = float(i) / numpads
percentage = int(floor(p*100)) percentage = int(floor(p * 100))
bars = int(ceil(p*20)) bars = int(ceil(p * 20))
bar = ("*"*bars) + ("-"*(20-bars)) bar = ("*" * bars) + ("-" * (20 - bars))
msg = "\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid) msg = "\r{0} {1}/{2} {3}... ".format(bar, (i + 1), numpads, padid)
if len(msg) > maxmsglen: if len(msg) > maxmsglen:
maxmsglen = len(msg) maxmsglen = len(msg)
sys.stderr.write("\r{0}".format(" "*maxmsglen)) sys.stderr.write("\r{0}".format(" " * maxmsglen))
sys.stderr.write(msg.encode("utf-8")) sys.stderr.write(msg.encode("utf-8"))
sys.stderr.flush() sys.stderr.flush()
m = groupnamepat.match(padid) m = groupnamepat.match(padid)
if m: if m:
groupname = m.group(1) groupname = m.group(1)
padidnogroup = padid[m.end():] padidnogroup = padid[m.end() :]
else: else:
groupname = "" groupname = ""
padidnogroup = padid padidnogroup = padid
data['padID'] = padid.encode("utf-8") data['padID'] = padid.encode("utf-8")
revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions'] revisions = jsonload(apiurl + 'getRevisionsCount?' + urlencode(data))[
'data'
]['revisions']
if (revisions == 0) and not args.zerorevs: if (revisions == 0) and not args.zerorevs:
continue continue
lastedited_raw = jsonload(apiurl + 'getLastEdited?' + urlencode(data))[
lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited'] 'data'
lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat() ]['lastEdited']
author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] lastedited_iso = datetime.fromtimestamp(
int(lastedited_raw) / 1000
).isoformat()
author_ids = jsonload(apiurl + 'listAuthorsOfPad?' + urlencode(data))[
'data'
]['authorIDs']
author_ids = " ".join(author_ids).encode("utf-8") author_ids = " ".join(author_ids).encode("utf-8")
out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids)) out.writerow(
(
padidnogroup.encode("utf-8"),
groupname.encode("utf-8"),
revisions,
lastedited_iso,
author_ids,
)
)
count += 1 count += 1
print("\nWrote {0} rows...".format(count), file=sys.stderr) print("\nWrote {0} rows...".format(count), file=sys.stderr)

View File

@ -1,18 +1,27 @@
from argparse import ArgumentParser
import json import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import urlopen from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args): def main(args):
p = ArgumentParser("calls the getHTML API function for the given padid") p = ArgumentParser("calls the getHTML API function for the given padid")
p.add_argument("padid", help="the padid") p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true") p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") p.add_argument(
p.add_argument("--rev", type=int, default=None, help="revision, default: latest") "--format",
default="text",
help="output format, can be: text, json; default: text",
)
p.add_argument(
"--rev", type=int, default=None, help="revision, default: latest"
)
args = p.parse_args(args) args = p.parse_args(args)
with open(args.padinfo) as f: with open(args.padinfo) as f:
@ -24,12 +33,12 @@ def main(args):
data['padID'] = args.padid data['padID'] = args.padid
if args.rev != None: if args.rev != None:
data['rev'] = args.rev data['rev'] = args.rev
requesturl = apiurl+'getHTML?'+urlencode(data) requesturl = apiurl + 'getHTML?' + urlencode(data)
if args.showurl: if args.showurl:
print (requesturl) print(requesturl)
else: else:
results = json.load(urlopen(requesturl))['data'] results = json.load(urlopen(requesturl))['data']
if args.format == "json": if args.format == "json":
print (json.dumps(results)) print(json.dumps(results))
else: else:
print (results['html'].encode("utf-8")) print(results['html'].encode("utf-8"))

View File

@ -1,17 +1,27 @@
import json
import sys
from argparse import ArgumentParser from argparse import ArgumentParser
import json, sys
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import urlopen, URLError, HTTPError from urllib.request import HTTPError, URLError, urlopen
def main(args): def main(args):
p = ArgumentParser("calls the getText API function for the given padid") p = ArgumentParser("calls the getText API function for the given padid")
p.add_argument("padid", help="the padid") p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true") p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") p.add_argument(
p.add_argument("--rev", type=int, default=None, help="revision, default: latest") "--format",
default="text",
help="output format, can be: text, json; default: text",
)
p.add_argument(
"--rev", type=int, default=None, help="revision, default: latest"
)
args = p.parse_args(args) args = p.parse_args(args)
with open(args.padinfo) as f: with open(args.padinfo) as f:
@ -20,18 +30,18 @@ def main(args):
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) # apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
data = {} data = {}
data['apikey'] = info['apikey'] data['apikey'] = info['apikey']
data['padID'] = args.padid # is utf-8 encoded data['padID'] = args.padid # is utf-8 encoded
if args.rev != None: if args.rev != None:
data['rev'] = args.rev data['rev'] = args.rev
requesturl = apiurl+'getText?'+urlencode(data) requesturl = apiurl + 'getText?' + urlencode(data)
if args.showurl: if args.showurl:
print (requesturl) print(requesturl)
else: else:
resp = urlopen(requesturl).read() resp = urlopen(requesturl).read()
resp = resp.decode("utf-8") resp = resp.decode("utf-8")
results = json.loads(resp) results = json.loads(resp)
if args.format == "json": if args.format == "json":
print (json.dumps(results)) print(json.dumps(results))
else: else:
if results['data']: if results['data']:
sys.stdout.write(results['data']['text']) sys.stdout.write(results['data']['text'])

View File

@ -1,28 +1,31 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from html5lib import parse import os
import os, sys import sys
from argparse import ArgumentParser from argparse import ArgumentParser
from xml.etree import ElementTree as ET from xml.etree import ElementTree as ET
from html5lib import parse
def etree_indent(elem, level=0): def etree_indent(elem, level=0):
i = "\n" + level*" " i = "\n" + level * " "
if len(elem): if len(elem):
if not elem.text or not elem.text.strip(): if not elem.text or not elem.text.strip():
elem.text = i + " " elem.text = i + " "
if not elem.tail or not elem.tail.strip(): if not elem.tail or not elem.tail.strip():
elem.tail = i elem.tail = i
for elem in elem: for elem in elem:
etree_indent(elem, level+1) etree_indent(elem, level + 1)
if not elem.tail or not elem.tail.strip(): if not elem.tail or not elem.tail.strip():
elem.tail = i elem.tail = i
else: else:
if level and (not elem.tail or not elem.tail.strip()): if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i elem.tail = i
def get_link_type (url):
def get_link_type(url):
lurl = url.lower() lurl = url.lower()
if lurl.endswith(".html") or lurl.endswith(".htm"): if lurl.endswith(".html") or lurl.endswith(".htm"):
return "text/html" return "text/html"
@ -37,13 +40,17 @@ def get_link_type (url):
elif lurl.endswith(".js") or lurl.endswith(".jsonp"): elif lurl.endswith(".js") or lurl.endswith(".jsonp"):
return "text/javascript" return "text/javascript"
def pluralize (x):
def pluralize(x):
if type(x) == list or type(x) == tuple: if type(x) == list or type(x) == tuple:
return x return x
else: else:
return (x,) return (x,)
def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, indent=False):
def html5tidy(
doc, charset="utf-8", title=None, scripts=None, links=None, indent=False
):
if scripts: if scripts:
script_srcs = [x.attrib.get("src") for x in doc.findall(".//script")] script_srcs = [x.attrib.get("src") for x in doc.findall(".//script")]
for src in pluralize(scripts): for src in pluralize(scripts):
@ -56,21 +63,30 @@ def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, inden
for elt in doc.findall(".//link"): for elt in doc.findall(".//link"):
href = elt.attrib.get("href") href = elt.attrib.get("href")
if href: if href:
existinglinks[href] = elt existinglinks[href] = elt
for link in links: for link in links:
linktype = link.get("type") or get_link_type(link["href"]) linktype = link.get("type") or get_link_type(link["href"])
if link["href"] in existinglinks: if link["href"] in existinglinks:
elt = existinglinks[link["href"]] elt = existinglinks[link["href"]]
elt.attrib["rel"] = link["rel"] elt.attrib["rel"] = link["rel"]
else: else:
elt = ET.SubElement(doc.find(".//head"), "link", href=link["href"], rel=link["rel"]) elt = ET.SubElement(
doc.find(".//head"),
"link",
href=link["href"],
rel=link["rel"],
)
if linktype: if linktype:
elt.attrib["type"] = linktype elt.attrib["type"] = linktype
if "title" in link: if "title" in link:
elt.attrib["title"] = link["title"] elt.attrib["title"] = link["title"]
if charset: if charset:
meta_charsets = [x.attrib.get("charset") for x in doc.findall(".//meta") if x.attrib.get("charset") != None] meta_charsets = [
x.attrib.get("charset")
for x in doc.findall(".//meta")
if x.attrib.get("charset") != None
]
if not meta_charsets: if not meta_charsets:
meta = ET.SubElement(doc.find(".//head"), "meta", charset=charset) meta = ET.SubElement(doc.find(".//head"), "meta", charset=charset)
@ -79,33 +95,89 @@ def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, inden
if not titleelt: if not titleelt:
titleelt = ET.SubElement(doc.find(".//head"), "title") titleelt = ET.SubElement(doc.find(".//head"), "title")
titleelt.text = title titleelt.text = title
if indent: if indent:
etree_indent(doc) etree_indent(doc)
return doc return doc
def main (args):
def main(args):
p = ArgumentParser("") p = ArgumentParser("")
p.add_argument("input", nargs="?", default=None) p.add_argument("input", nargs="?", default=None)
p.add_argument("--indent", default=False, action="store_true") p.add_argument("--indent", default=False, action="store_true")
p.add_argument("--mogrify", default=False, action="store_true", help="modify file in place") p.add_argument(
p.add_argument("--method", default="html", help="method, default: html, values: html, xml, text") "--mogrify",
default=False,
action="store_true",
help="modify file in place",
)
p.add_argument(
"--method",
default="html",
help="method, default: html, values: html, xml, text",
)
p.add_argument("--output", default=None, help="") p.add_argument("--output", default=None, help="")
p.add_argument("--title", default=None, help="ensure/add title tag in head") p.add_argument("--title", default=None, help="ensure/add title tag in head")
p.add_argument("--charset", default="utf-8", help="ensure/add meta tag with charset") p.add_argument(
p.add_argument("--script", action="append", default=[], help="ensure/add script tag") "--charset", default="utf-8", help="ensure/add meta tag with charset"
)
p.add_argument(
"--script", action="append", default=[], help="ensure/add script tag"
)
# <link>s, see https://www.w3.org/TR/html5/links.html#links # <link>s, see https://www.w3.org/TR/html5/links.html#links
p.add_argument("--stylesheet", action="append", default=[], help="ensure/add style link") p.add_argument(
p.add_argument("--alternate", action="append", default=[], nargs="+", help="ensure/add alternate links (optionally followed by a title and type)") "--stylesheet",
p.add_argument("--next", action="append", default=[], nargs="+", help="ensure/add alternate link") action="append",
p.add_argument("--prev", action="append", default=[], nargs="+", help="ensure/add alternate link") default=[],
p.add_argument("--search", action="append", default=[], nargs="+", help="ensure/add search link") help="ensure/add style link",
p.add_argument("--rss", action="append", default=[], nargs="+", help="ensure/add alternate link of type application/rss+xml") )
p.add_argument("--atom", action="append", default=[], nargs="+", help="ensure/add alternate link of type application/atom+xml") p.add_argument(
"--alternate",
action="append",
default=[],
nargs="+",
help="ensure/add alternate links (optionally followed by a title and type)",
)
p.add_argument(
"--next",
action="append",
default=[],
nargs="+",
help="ensure/add alternate link",
)
p.add_argument(
"--prev",
action="append",
default=[],
nargs="+",
help="ensure/add alternate link",
)
p.add_argument(
"--search",
action="append",
default=[],
nargs="+",
help="ensure/add search link",
)
p.add_argument(
"--rss",
action="append",
default=[],
nargs="+",
help="ensure/add alternate link of type application/rss+xml",
)
p.add_argument(
"--atom",
action="append",
default=[],
nargs="+",
help="ensure/add alternate link of type application/atom+xml",
)
args = p.parse_args(args) args = p.parse_args(args)
links = [] links = []
def add_links (links, items, rel, _type=None):
def add_links(links, items, rel, _type=None):
for href in items: for href in items:
d = {} d = {}
d["rel"] = rel d["rel"] = rel
@ -128,6 +200,7 @@ def main (args):
d["href"] = href d["href"] = href
links.append(d) links.append(d)
for rel in ("stylesheet", "alternate", "next", "prev", "search"): for rel in ("stylesheet", "alternate", "next", "prev", "search"):
add_links(links, getattr(args, rel), rel) add_links(links, getattr(args, rel), rel)
for item in args.rss: for item in args.rss:
@ -144,27 +217,33 @@ def main (args):
doc = parse(fin, treebuilder="etree", namespaceHTMLElements=False) doc = parse(fin, treebuilder="etree", namespaceHTMLElements=False)
if fin != sys.stdin: if fin != sys.stdin:
fin.close() fin.close()
html5tidy(doc, scripts=args.script, links=links, title=args.title, indent=args.indent) html5tidy(
doc,
scripts=args.script,
links=links,
title=args.title,
indent=args.indent,
)
# OUTPUT # OUTPUT
tmppath = None tmppath = None
if args.output: if args.output:
fout = open(args.output, "w") fout = open(args.output, "w")
elif args.mogrify: elif args.mogrify:
tmppath = args.input+".tmp" tmppath = args.input + ".tmp"
fout = open(tmppath, "w") fout = open(tmppath, "w")
else: else:
fout = sys.stdout fout = sys.stdout
print (ET.tostring(doc, method=args.method, encoding="unicode"), file=fout) print(ET.tostring(doc, method=args.method, encoding="unicode"), file=fout)
if fout != sys.stdout: if fout != sys.stdout:
fout.close() fout.close()
if tmppath: if tmppath:
os.rename(args.input, args.input+"~") os.rename(args.input, args.input + "~")
os.rename(tmppath, args.input) os.rename(tmppath, args.input)
if __name__ == "__main__": if __name__ == "__main__":
main(sys.argv) main(sys.argv)

View File

@ -1,16 +1,19 @@
import json
import os
import re
import sys
import time
from argparse import ArgumentParser from argparse import ArgumentParser
import sys, json, re, os, time
from datetime import datetime from datetime import datetime
import dateutil.parser
from urllib.parse import urlparse, urlunparse, urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
from jinja2 import FileSystemLoader, Environment
from etherpump.commands.common import *
from time import sleep from time import sleep
from urllib.parse import quote, urlencode, urlparse, urlunparse
from urllib.request import HTTPError, URLError, urlopen
from jinja2 import Environment, FileSystemLoader
import dateutil.parser import dateutil.parser
from etherpump.commands.common import *
""" """
index: index:
@ -20,7 +23,8 @@ index:
""" """
def group (items, key=lambda x: x):
def group(items, key=lambda x: x):
""" returns a list of lists, of items grouped by a key function """ """ returns a list of lists, of items grouped by a key function """
ret = [] ret = []
keys = {} keys = {}
@ -34,10 +38,12 @@ def group (items, key=lambda x: x):
ret.append(keys[k]) ret.append(keys[k])
return ret return ret
# def base (x): # def base (x):
# return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) # return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
def splitextlong (x):
def splitextlong(x):
""" split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """ """ split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """
m = re.search(r"^(.*?)(\..*)$", x) m = re.search(r"^(.*?)(\..*)$", x)
if m: if m:
@ -45,20 +51,24 @@ def splitextlong (x):
else: else:
return x, '' return x, ''
def base (x):
def base(x):
return splitextlong(x)[0] return splitextlong(x)[0]
def excerpt (t, chars=25):
def excerpt(t, chars=25):
if len(t) > chars: if len(t) > chars:
t = t[:chars] + "..." t = t[:chars] + "..."
return t return t
def absurl (url, base=None):
def absurl(url, base=None):
if not url.startswith("http"): if not url.startswith("http"):
return base + url return base + url
return url return url
def url_base (url):
def url_base(url):
(scheme, netloc, path, params, query, fragment) = urlparse(url) (scheme, netloc, path, params, query, fragment) = urlparse(url)
path, _ = os.path.split(path.lstrip("/")) path, _ = os.path.split(path.lstrip("/"))
ret = urlunparse((scheme, netloc, path, None, None, None)) ret = urlunparse((scheme, netloc, path, None, None, None))
@ -66,45 +76,131 @@ def url_base (url):
ret += "/" ret += "/"
return ret return ret
def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'):
def datetimeformat(t, format='%Y-%m-%d %H:%M:%S'):
if type(t) == str: if type(t) == str:
dt = dateutil.parser.parse(t) dt = dateutil.parser.parse(t)
return dt.strftime(format) return dt.strftime(format)
else: else:
return time.strftime(format, time.localtime(t)) return time.strftime(format, time.localtime(t))
def main (args):
def main(args):
p = ArgumentParser("Convert dumped files to a document via a template.") p = ArgumentParser("Convert dumped files to a document via a template.")
p.add_argument("input", nargs="+", help="Files to list (.meta.json files)") p.add_argument("input", nargs="+", help="Files to list (.meta.json files)")
p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in") p.add_argument(
p.add_argument("--template", default="index.html", help="template name, built-ins include index.html, rss.xml; default: index.html") "--templatepath",
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: ./.etherdump/settings.json") default=None,
help="path to find templates, default: built-in",
)
p.add_argument(
"--template",
default="index.html",
help="template name, built-ins include index.html, rss.xml; default: index.html",
)
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: ./.etherdump/settings.json",
)
# p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") # p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
p.add_argument("--order", default="padid", help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid") p.add_argument(
p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)") "--order",
p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)") default="padid",
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid",
)
p.add_argument(
"--reverse",
default=False,
action="store_true",
help="reverse order, default: False (reverse chrono)",
)
p.add_argument(
"--limit",
type=int,
default=0,
help="limit to number of items, default: 0 (no limit)",
)
p.add_argument(
"--skip",
default=None,
type=int,
help="skip this many items, default: None",
)
p.add_argument("--content", default=False, action="store_true", help="rss: include (full) content tag, default: False") p.add_argument(
p.add_argument("--link", default="diffhtml,html,text", help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text") "--content",
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl") default=False,
action="store_true",
help="rss: include (full) content tag, default: False",
)
p.add_argument(
"--link",
default="diffhtml,html,text",
help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text",
)
p.add_argument(
"--linkbase",
default=None,
help="base url to use for links, default: try to use the feedurl",
)
p.add_argument("--output", default=None, help="output, default: stdout") p.add_argument("--output", default=None, help="output, default: stdout")
p.add_argument("--files", default=False, action="store_true", help="include files (experimental)") p.add_argument(
"--files",
default=False,
action="store_true",
help="include files (experimental)",
)
pg = p.add_argument_group('template variables') pg = p.add_argument_group('template variables')
pg.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml") pg.add_argument(
pg.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url") "--feedurl",
pg.add_argument("--title", default="etherpump", help="title for document or rss feed channel title, default: etherdump") default="feed.xml",
pg.add_argument("--description", default="", help="rss: channel description, default: empty") help="rss: to use as feeds own (self) link, default: feed.xml",
pg.add_argument("--language", default="en-US", help="rss: feed language, default: en-US") )
pg.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily") pg.add_argument(
pg.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1") "--siteurl",
pg.add_argument("--generator", default="https://gitlab.com/activearchives/etherpump", help="generator, default: https://gitlab.com/activearchives/etherdump") default=None,
pg.add_argument("--timestamp", default=None, help="timestamp, default: now (e.g. 2015-12-01 12:30:00)") help="rss: to use as channel's site link, default: the etherpad url",
)
pg.add_argument(
"--title",
default="etherpump",
help="title for document or rss feed channel title, default: etherdump",
)
pg.add_argument(
"--description",
default="",
help="rss: channel description, default: empty",
)
pg.add_argument(
"--language", default="en-US", help="rss: feed language, default: en-US"
)
pg.add_argument(
"--updatePeriod",
default="daily",
help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily",
)
pg.add_argument(
"--updateFrequency",
default=1,
type=int,
help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1",
)
pg.add_argument(
"--generator",
default="https://gitlab.com/activearchives/etherpump",
help="generator, default: https://gitlab.com/activearchives/etherdump",
)
pg.add_argument(
"--timestamp",
default=None,
help="timestamp, default: now (e.g. 2015-12-01 12:30:00)",
)
pg.add_argument("--next", default=None, help="next link, default: None)") pg.add_argument("--next", default=None, help="next link, default: None)")
pg.add_argument("--prev", default=None, help="prev link, default: None") pg.add_argument("--prev", default=None, help="prev link, default: None")
@ -129,17 +225,12 @@ def main (args):
# Use "base" to strip (longest) extensions # Use "base" to strip (longest) extensions
# inputs = group(inputs, base) # inputs = group(inputs, base)
def wrappath (p): def wrappath(p):
path = "./{0}".format(p) path = "./{0}".format(p)
ext = os.path.splitext(p)[1][1:] ext = os.path.splitext(p)[1][1:]
return { return {"url": path, "path": path, "code": 200, "type": ext}
"url": path,
"path": path,
"code": 200,
"type": ext
}
def metaforpaths (paths): def metaforpaths(paths):
ret = {} ret = {}
pid = base(paths[0]) pid = base(paths[0])
ret['pad'] = ret['padid'] = pid ret['pad'] = ret['padid'] = pid
@ -149,7 +240,9 @@ def main (args):
mtime = os.stat(p).st_mtime mtime = os.stat(p).st_mtime
if lastedited == None or mtime > lastedited: if lastedited == None or mtime > lastedited:
lastedited = mtime lastedited = mtime
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S") ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime(
"%Y-%m-%dT%H:%M:%S"
)
ret["lastedited_raw"] = mtime ret["lastedited_raw"] = mtime
return ret return ret
@ -169,7 +262,7 @@ def main (args):
# else: # else:
# return metaforpaths(paths) # return metaforpaths(paths)
def fixdates (padmeta): def fixdates(padmeta):
d = dateutil.parser.parse(padmeta["lastedited_iso"]) d = dateutil.parser.parse(padmeta["lastedited_iso"])
padmeta["lastedited"] = d padmeta["lastedited"] = d
padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000") padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000")
@ -180,17 +273,21 @@ def main (args):
pads = list(map(fixdates, pads)) pads = list(map(fixdates, pads))
args.pads = list(pads) args.pads = list(pads)
def could_have_base (x, y): def could_have_base(x, y):
return x == y or (x.startswith(y) and x[len(y):].startswith(".")) return x == y or (x.startswith(y) and x[len(y) :].startswith("."))
def get_best_pad (x): def get_best_pad(x):
for pb in padbases: for pb in padbases:
p = pads_by_base[pb] p = pads_by_base[pb]
if could_have_base(x, pb): if could_have_base(x, pb):
return p return p
def has_version (padinfo, path): def has_version(padinfo, path):
return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path] return [
x
for x in padinfo['versions']
if 'path' in x and x['path'] == "./" + path
]
if args.files: if args.files:
inputs = args.input inputs = args.input
@ -208,25 +305,33 @@ def main (args):
# print ("PADBASES", file=sys.stderr) # print ("PADBASES", file=sys.stderr)
# for pb in padbases: # for pb in padbases:
# print (" ", pb, file=sys.stderr) # print (" ", pb, file=sys.stderr)
print ("pairing input files with pads", file=sys.stderr) print("pairing input files with pads", file=sys.stderr)
for x in inputs: for x in inputs:
# pair input with a pad if possible # pair input with a pad if possible
xbasename = os.path.basename(x) xbasename = os.path.basename(x)
p = get_best_pad(xbasename) p = get_best_pad(xbasename)
if p: if p:
if not has_version(p, x): if not has_version(p, x):
print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr) print(
"Grouping file {0} with pad {1}".format(x, p['padid']),
file=sys.stderr,
)
p['versions'].append(wrappath(x)) p['versions'].append(wrappath(x))
else: else:
print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr) print(
"Skipping existing version {0} ({1})...".format(
x, p['padid']
),
file=sys.stderr,
)
removelist.append(x) removelist.append(x)
# Removed Matches files # Removed Matches files
for x in removelist: for x in removelist:
inputs.remove(x) inputs.remove(x)
print ("Remaining files:", file=sys.stderr) print("Remaining files:", file=sys.stderr)
for x in inputs: for x in inputs:
print (x, file=sys.stderr) print(x, file=sys.stderr)
print (file=sys.stderr) print(file=sys.stderr)
# Add "fake" pads for remaining files # Add "fake" pads for remaining files
for x in inputs: for x in inputs:
args.pads.append(metaforpaths([x])) args.pads.append(metaforpaths([x]))
@ -242,7 +347,9 @@ def main (args):
# order items & apply limit # order items & apply limit
if args.order == "lastedited": if args.order == "lastedited":
args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse) args.pads.sort(
key=lambda x: x.get("lastedited_iso"), reverse=args.reverse
)
elif args.order == "pad": elif args.order == "pad":
args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse) args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse)
elif args.order == "padid": elif args.order == "padid":
@ -250,12 +357,14 @@ def main (args):
elif args.order == "revisions": elif args.order == "revisions":
args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse) args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse)
elif args.order == "authors": elif args.order == "authors":
args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse) args.pads.sort(
key=lambda x: len(x.get("authors")), reverse=args.reverse
)
else: else:
raise Exception("That ordering is not implemented!") raise Exception("That ordering is not implemented!")
if args.limit: if args.limit:
args.pads = args.pads[:args.limit] args.pads = args.pads[: args.limit]
# add versions_by_type, add in full text # add versions_by_type, add in full text
# add link (based on args.link) # add link (based on args.link)
@ -272,7 +381,7 @@ def main (args):
if "text" in versions_by_type: if "text" in versions_by_type:
try: try:
with open (versions_by_type["text"]["path"]) as f: with open(versions_by_type["text"]["path"]) as f:
p["text"] = f.read() p["text"] = f.read()
except FileNotFoundError: except FileNotFoundError:
p['text'] = '' p['text'] = ''
@ -289,6 +398,6 @@ def main (args):
if args.output: if args.output:
with open(args.output, "w") as f: with open(args.output, "w") as f:
print (template.render(vars(args)), file=f) print(template.render(vars(args)), file=f)
else: else:
print (template.render(vars(args))) print(template.render(vars(args)))

View File

@ -1,19 +1,19 @@
import json
import os
import sys
from argparse import ArgumentParser from argparse import ArgumentParser
from urllib.parse import urlencode, urlparse, urlunparse
from urllib.request import HTTPError, URLError, urlopen
from urllib.parse import urlparse, urlunparse, urlencode
from urllib.request import urlopen, URLError, HTTPError
import json, os, sys
def get_api(url, cmd=None, data=None, verbose=False): def get_api(url, cmd=None, data=None, verbose=False):
try: try:
useurl = url+cmd useurl = url + cmd
if data: if data:
useurl += "?"+urlencode(data) useurl += "?" + urlencode(data)
# data['apikey'] = "7c8faa070c97f83d8f705c935a32d5141f89cbaa2158042fa92e8ddad5dbc5e1" # data['apikey'] = "7c8faa070c97f83d8f705c935a32d5141f89cbaa2158042fa92e8ddad5dbc5e1"
if verbose: if verbose:
print ("trying", useurl, file=sys.stderr) print("trying", useurl, file=sys.stderr)
resp = urlopen(useurl).read() resp = urlopen(useurl).read()
resp = resp.decode("utf-8") resp = resp.decode("utf-8")
resp = json.loads(resp) resp = json.loads(resp)
@ -21,11 +21,11 @@ def get_api(url, cmd=None, data=None, verbose=False):
return resp return resp
except ValueError as e: except ValueError as e:
if verbose: if verbose:
print (" ValueError", e, file=sys.stderr) print(" ValueError", e, file=sys.stderr)
return return
except HTTPError as e: except HTTPError as e:
if verbose: if verbose:
print (" HTTPError", e, file=sys.stderr) print(" HTTPError", e, file=sys.stderr)
if e.code == 401: if e.code == 401:
# Unauthorized is how the API responds to an incorrect API key # Unauthorized is how the API responds to an incorrect API key
return {"code": 401, "message": e} return {"code": 401, "message": e}
@ -34,7 +34,8 @@ def get_api(url, cmd=None, data=None, verbose=False):
# # print ("returning", resp, file=sys.stderr) # # print ("returning", resp, file=sys.stderr)
# return resp # return resp
def tryapiurl (url, verbose=False):
def tryapiurl(url, verbose=False):
""" """
Try to use url as api, correcting if possible. Try to use url as api, correcting if possible.
Returns corrected / normalized URL, or None if not possible Returns corrected / normalized URL, or None if not possible
@ -47,22 +48,30 @@ def tryapiurl (url, verbose=False):
params, query, fragment = ("", "", "") params, query, fragment = ("", "", "")
path = path.strip("/") path = path.strip("/")
# 1. try directly... # 1. try directly...
apiurl = urlunparse((scheme, netloc, path, params, query, fragment))+"/" apiurl = (
urlunparse((scheme, netloc, path, params, query, fragment)) + "/"
)
if get_api(apiurl, "listAllPads", verbose=verbose): if get_api(apiurl, "listAllPads", verbose=verbose):
return apiurl return apiurl
# 2. try with += api/1.2.9 # 2. try with += api/1.2.9
path = os.path.join(path, "api", "1.2.9")+"/" path = os.path.join(path, "api", "1.2.9") + "/"
apiurl = urlunparse((scheme, netloc, path, params, query, fragment)) apiurl = urlunparse((scheme, netloc, path, params, query, fragment))
if get_api(apiurl, "listAllPads", verbose=verbose): if get_api(apiurl, "listAllPads", verbose=verbose):
return apiurl return apiurl
# except ValueError as e: # except ValueError as e:
# print ("ValueError", e, file=sys.stderr) # print ("ValueError", e, file=sys.stderr)
except URLError as e: except URLError as e:
print ("URLError", e, file=sys.stderr) print("URLError", e, file=sys.stderr)
def main(args): def main(args):
p = ArgumentParser("initialize an etherpump folder") p = ArgumentParser("initialize an etherpump folder")
p.add_argument("arg", nargs="*", default=[], help="optional positional args: path etherpadurl") p.add_argument(
"arg",
nargs="*",
default=[],
help="optional positional args: path etherpadurl",
)
p.add_argument("--path", default=None, help="path to initialize") p.add_argument("--path", default=None, help="path to initialize")
p.add_argument("--padurl", default=None, help="") p.add_argument("--padurl", default=None, help="")
p.add_argument("--apikey", default=None, help="") p.add_argument("--apikey", default=None, help="")
@ -70,7 +79,6 @@ def main(args):
p.add_argument("--reinit", default=False, action="store_true", help="") p.add_argument("--reinit", default=False, action="store_true", help="")
args = p.parse_args(args) args = p.parse_args(args)
path = args.path path = args.path
if path == None and len(args.arg): if path == None and len(args.arg):
path = args.arg[0] path = args.arg[0]
@ -89,7 +97,9 @@ def main(args):
with open(padinfopath) as f: with open(padinfopath) as f:
padinfo = json.load(f) padinfo = json.load(f)
if not args.reinit: if not args.reinit:
print ("Folder is already initialized. Use --reinit to reset settings.") print(
"Folder is already initialized. Use --reinit to reset settings."
)
sys.exit(0) sys.exit(0)
except IOError: except IOError:
pass pass
@ -100,7 +110,7 @@ def main(args):
apiurl = args.padurl apiurl = args.padurl
while True: while True:
if apiurl: if apiurl:
apiurl = tryapiurl(apiurl,verbose=args.verbose) apiurl = tryapiurl(apiurl, verbose=args.verbose)
if apiurl: if apiurl:
# print ("Got APIURL: {0}".format(apiurl)) # print ("Got APIURL: {0}".format(apiurl))
break break
@ -109,13 +119,18 @@ def main(args):
apikey = args.apikey apikey = args.apikey
while True: while True:
if apikey: if apikey:
resp = get_api(apiurl, "listAllPads", {"apikey": apikey}, verbose=args.verbose) resp = get_api(
apiurl, "listAllPads", {"apikey": apikey}, verbose=args.verbose
)
if resp and resp["code"] == 0: if resp and resp["code"] == 0:
# print ("GOOD") # print ("GOOD")
break break
else: else:
print ("bad") print("bad")
print ("The APIKEY is the contents of the file APIKEY.txt in the etherpad folder", file=sys.stderr) print(
"The APIKEY is the contents of the file APIKEY.txt in the etherpad folder",
file=sys.stderr,
)
apikey = input("Please paste the APIKEY: ").strip() apikey = input("Please paste the APIKEY: ").strip()
padinfo["apikey"] = apikey padinfo["apikey"] = apikey

View File

@ -1,11 +1,13 @@
import json
import os
import re
from argparse import ArgumentParser from argparse import ArgumentParser
import json, os, re from urllib.error import HTTPError, URLError
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import urlopen from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def group (items, key=lambda x: x):
def group(items, key=lambda x: x):
ret = [] ret = []
keys = {} keys = {}
for item in items: for item in items:
@ -18,6 +20,7 @@ def group (items, key=lambda x: x):
ret.append(keys[k]) ret.append(keys[k])
return ret return ret
def main(args): def main(args):
p = ArgumentParser("") p = ArgumentParser("")
p.add_argument("input", nargs="+", help="filenames") p.add_argument("input", nargs="+", help="filenames")
@ -28,10 +31,11 @@ def main(args):
inputs = [x for x in inputs if not os.path.isdir(x)] inputs = [x for x in inputs if not os.path.isdir(x)]
def base (x): def base(x):
return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x) return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x)
#from pprint import pprint
#pprint() # from pprint import pprint
# pprint()
gg = group(inputs, base) gg = group(inputs, base)
for items in gg: for items in gg:
itembase = base(items[0]) itembase = base(items[0])
@ -41,5 +45,5 @@ def main(args):
pass pass
for i in items: for i in items:
newloc = os.path.join(itembase, i) newloc = os.path.join(itembase, i)
print ("'{0}' => '{1}'".format(i, newloc)) print("'{0}' => '{1}'".format(i, newloc))
os.rename(i, newloc) os.rename(i, newloc)

View File

@ -1,31 +1,40 @@
from argparse import ArgumentParser
import json import json
import sys import sys
from etherpump.commands.common import getjson from argparse import ArgumentParser
from urllib.parse import urlparse, urlunparse, urlencode from urllib.parse import urlencode, urlparse, urlunparse
from urllib.request import urlopen, URLError, HTTPError from urllib.request import HTTPError, URLError, urlopen
def main (args): from etherpump.commands.common import getjson
def main(args):
p = ArgumentParser("call listAllPads and print the results") p = ArgumentParser("call listAllPads and print the results")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true") p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="lines", help="output format: lines, json; default lines") p.add_argument(
"--format",
default="lines",
help="output format: lines, json; default lines",
)
args = p.parse_args(args) args = p.parse_args(args)
with open(args.padinfo) as f: with open(args.padinfo) as f:
info = json.load(f) info = json.load(f)
apiurl = info.get("apiurl") apiurl = info.get("apiurl")
# apiurl = {0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) # apiurl = {0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
data = {} data = {}
data['apikey'] = info['apikey'] data['apikey'] = info['apikey']
requesturl = apiurl+'listAllPads?'+urlencode(data) requesturl = apiurl + 'listAllPads?' + urlencode(data)
if args.showurl: if args.showurl:
print (requesturl) print(requesturl)
else: else:
results = getjson(requesturl)['data']['padIDs'] results = getjson(requesturl)['data']['padIDs']
if args.format == "json": if args.format == "json":
print (json.dumps(results)) print(json.dumps(results))
else: else:
for r in results: for r in results:
print (r) print(r)

View File

@ -1,17 +1,24 @@
from argparse import ArgumentParser
import json import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import urlopen from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args): def main(args):
p = ArgumentParser("call listAuthorsOfPad for the padid") p = ArgumentParser("call listAuthorsOfPad for the padid")
p.add_argument("padid", help="the padid") p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true") p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="lines", help="output format, can be: lines, json; default: lines") p.add_argument(
"--format",
default="lines",
help="output format, can be: lines, json; default: lines",
)
args = p.parse_args(args) args = p.parse_args(args)
with open(args.padinfo) as f: with open(args.padinfo) as f:
@ -20,13 +27,13 @@ def main(args):
data = {} data = {}
data['apikey'] = info['apikey'] data['apikey'] = info['apikey']
data['padID'] = args.padid.encode("utf-8") data['padID'] = args.padid.encode("utf-8")
requesturl = apiurl+'listAuthorsOfPad?'+urlencode(data) requesturl = apiurl + 'listAuthorsOfPad?' + urlencode(data)
if args.showurl: if args.showurl:
print (requesturl) print(requesturl)
else: else:
results = json.load(urlopen(requesturl))['data']['authorIDs'] results = json.load(urlopen(requesturl))['data']['authorIDs']
if args.format == "json": if args.format == "json":
print (json.dumps(results)) print(json.dumps(results))
else: else:
for r in results: for r in results:
print (r.encode("utf-8")) print(r.encode("utf-8"))

View File

@ -1,17 +1,20 @@
import json
import os
import re
import sys
import time
from argparse import ArgumentParser from argparse import ArgumentParser
import sys, json, re, os, time
from datetime import datetime from datetime import datetime
from time import sleep
from urllib.parse import quote, urlencode, urlparse, urlunparse
from urllib.request import HTTPError, URLError, urlopen
from jinja2 import Environment, FileSystemLoader
import dateutil.parser import dateutil.parser
import pypandoc import pypandoc
from urllib.parse import urlparse, urlunparse, urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
from jinja2 import FileSystemLoader, Environment
from etherpump.commands.common import * from etherpump.commands.common import *
from time import sleep
import dateutil.parser
""" """
publication: publication:
@ -21,7 +24,8 @@ publication:
""" """
def group (items, key=lambda x: x):
def group(items, key=lambda x: x):
""" returns a list of lists, of items grouped by a key function """ """ returns a list of lists, of items grouped by a key function """
ret = [] ret = []
keys = {} keys = {}
@ -35,10 +39,12 @@ def group (items, key=lambda x: x):
ret.append(keys[k]) ret.append(keys[k])
return ret return ret
# def base (x): # def base (x):
# return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) # return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
def splitextlong (x):
def splitextlong(x):
""" split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """ """ split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """
m = re.search(r"^(.*?)(\..*)$", x) m = re.search(r"^(.*?)(\..*)$", x)
if m: if m:
@ -46,20 +52,24 @@ def splitextlong (x):
else: else:
return x, '' return x, ''
def base (x):
def base(x):
return splitextlong(x)[0] return splitextlong(x)[0]
def excerpt (t, chars=25):
def excerpt(t, chars=25):
if len(t) > chars: if len(t) > chars:
t = t[:chars] + "..." t = t[:chars] + "..."
return t return t
def absurl (url, base=None):
def absurl(url, base=None):
if not url.startswith("http"): if not url.startswith("http"):
return base + url return base + url
return url return url
def url_base (url):
def url_base(url):
(scheme, netloc, path, params, query, fragment) = urlparse(url) (scheme, netloc, path, params, query, fragment) = urlparse(url)
path, _ = os.path.split(path.lstrip("/")) path, _ = os.path.split(path.lstrip("/"))
ret = urlunparse((scheme, netloc, path, None, None, None)) ret = urlunparse((scheme, netloc, path, None, None, None))
@ -67,45 +77,131 @@ def url_base (url):
ret += "/" ret += "/"
return ret return ret
def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'):
def datetimeformat(t, format='%Y-%m-%d %H:%M:%S'):
if type(t) == str: if type(t) == str:
dt = dateutil.parser.parse(t) dt = dateutil.parser.parse(t)
return dt.strftime(format) return dt.strftime(format)
else: else:
return time.strftime(format, time.localtime(t)) return time.strftime(format, time.localtime(t))
def main (args):
def main(args):
p = ArgumentParser("Convert dumped files to a document via a template.") p = ArgumentParser("Convert dumped files to a document via a template.")
p.add_argument("input", nargs="+", help="Files to list (.meta.json files)") p.add_argument("input", nargs="+", help="Files to list (.meta.json files)")
p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in") p.add_argument(
p.add_argument("--template", default="publication.html", help="template name, built-ins include publication.html; default: publication.html") "--templatepath",
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: ./.etherdump/settings.json") default=None,
help="path to find templates, default: built-in",
)
p.add_argument(
"--template",
default="publication.html",
help="template name, built-ins include publication.html; default: publication.html",
)
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: ./.etherdump/settings.json",
)
# p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") # p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
p.add_argument("--order", default="padid", help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid") p.add_argument(
p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)") "--order",
p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)") default="padid",
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid",
)
p.add_argument(
"--reverse",
default=False,
action="store_true",
help="reverse order, default: False (reverse chrono)",
)
p.add_argument(
"--limit",
type=int,
default=0,
help="limit to number of items, default: 0 (no limit)",
)
p.add_argument(
"--skip",
default=None,
type=int,
help="skip this many items, default: None",
)
p.add_argument("--content", default=False, action="store_true", help="rss: include (full) content tag, default: False") p.add_argument(
p.add_argument("--link", default="diffhtml,html,text", help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text") "--content",
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl") default=False,
action="store_true",
help="rss: include (full) content tag, default: False",
)
p.add_argument(
"--link",
default="diffhtml,html,text",
help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text",
)
p.add_argument(
"--linkbase",
default=None,
help="base url to use for links, default: try to use the feedurl",
)
p.add_argument("--output", default=None, help="output, default: stdout") p.add_argument("--output", default=None, help="output, default: stdout")
p.add_argument("--files", default=False, action="store_true", help="include files (experimental)") p.add_argument(
"--files",
default=False,
action="store_true",
help="include files (experimental)",
)
pg = p.add_argument_group('template variables') pg = p.add_argument_group('template variables')
pg.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml") pg.add_argument(
pg.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url") "--feedurl",
pg.add_argument("--title", default="etherpump", help="title for document or rss feed channel title, default: etherdump") default="feed.xml",
pg.add_argument("--description", default="", help="rss: channel description, default: empty") help="rss: to use as feeds own (self) link, default: feed.xml",
pg.add_argument("--language", default="en-US", help="rss: feed language, default: en-US") )
pg.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily") pg.add_argument(
pg.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1") "--siteurl",
pg.add_argument("--generator", default="https://gitlab.com/activearchives/etherpump", help="generator, default: https://gitlab.com/activearchives/etherdump") default=None,
pg.add_argument("--timestamp", default=None, help="timestamp, default: now (e.g. 2015-12-01 12:30:00)") help="rss: to use as channel's site link, default: the etherpad url",
)
pg.add_argument(
"--title",
default="etherpump",
help="title for document or rss feed channel title, default: etherdump",
)
pg.add_argument(
"--description",
default="",
help="rss: channel description, default: empty",
)
pg.add_argument(
"--language", default="en-US", help="rss: feed language, default: en-US"
)
pg.add_argument(
"--updatePeriod",
default="daily",
help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily",
)
pg.add_argument(
"--updateFrequency",
default=1,
type=int,
help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1",
)
pg.add_argument(
"--generator",
default="https://gitlab.com/activearchives/etherpump",
help="generator, default: https://gitlab.com/activearchives/etherdump",
)
pg.add_argument(
"--timestamp",
default=None,
help="timestamp, default: now (e.g. 2015-12-01 12:30:00)",
)
pg.add_argument("--next", default=None, help="next link, default: None)") pg.add_argument("--next", default=None, help="next link, default: None)")
pg.add_argument("--prev", default=None, help="prev link, default: None") pg.add_argument("--prev", default=None, help="prev link, default: None")
@ -130,17 +226,12 @@ def main (args):
# Use "base" to strip (longest) extensions # Use "base" to strip (longest) extensions
# inputs = group(inputs, base) # inputs = group(inputs, base)
def wrappath (p): def wrappath(p):
path = "./{0}".format(p) path = "./{0}".format(p)
ext = os.path.splitext(p)[1][1:] ext = os.path.splitext(p)[1][1:]
return { return {"url": path, "path": path, "code": 200, "type": ext}
"url": path,
"path": path,
"code": 200,
"type": ext
}
def metaforpaths (paths): def metaforpaths(paths):
ret = {} ret = {}
pid = base(paths[0]) pid = base(paths[0])
ret['pad'] = ret['padid'] = pid ret['pad'] = ret['padid'] = pid
@ -150,7 +241,9 @@ def main (args):
mtime = os.stat(p).st_mtime mtime = os.stat(p).st_mtime
if lastedited == None or mtime > lastedited: if lastedited == None or mtime > lastedited:
lastedited = mtime lastedited = mtime
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S") ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime(
"%Y-%m-%dT%H:%M:%S"
)
ret["lastedited_raw"] = mtime ret["lastedited_raw"] = mtime
return ret return ret
@ -170,7 +263,7 @@ def main (args):
# else: # else:
# return metaforpaths(paths) # return metaforpaths(paths)
def fixdates (padmeta): def fixdates(padmeta):
d = dateutil.parser.parse(padmeta["lastedited_iso"]) d = dateutil.parser.parse(padmeta["lastedited_iso"])
padmeta["lastedited"] = d padmeta["lastedited"] = d
padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000") padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000")
@ -181,17 +274,21 @@ def main (args):
pads = list(map(fixdates, pads)) pads = list(map(fixdates, pads))
args.pads = list(pads) args.pads = list(pads)
def could_have_base (x, y): def could_have_base(x, y):
return x == y or (x.startswith(y) and x[len(y):].startswith(".")) return x == y or (x.startswith(y) and x[len(y) :].startswith("."))
def get_best_pad (x): def get_best_pad(x):
for pb in padbases: for pb in padbases:
p = pads_by_base[pb] p = pads_by_base[pb]
if could_have_base(x, pb): if could_have_base(x, pb):
return p return p
def has_version (padinfo, path): def has_version(padinfo, path):
return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path] return [
x
for x in padinfo['versions']
if 'path' in x and x['path'] == "./" + path
]
if args.files: if args.files:
inputs = args.input inputs = args.input
@ -209,25 +306,33 @@ def main (args):
# print ("PADBASES", file=sys.stderr) # print ("PADBASES", file=sys.stderr)
# for pb in padbases: # for pb in padbases:
# print (" ", pb, file=sys.stderr) # print (" ", pb, file=sys.stderr)
print ("pairing input files with pads", file=sys.stderr) print("pairing input files with pads", file=sys.stderr)
for x in inputs: for x in inputs:
# pair input with a pad if possible # pair input with a pad if possible
xbasename = os.path.basename(x) xbasename = os.path.basename(x)
p = get_best_pad(xbasename) p = get_best_pad(xbasename)
if p: if p:
if not has_version(p, x): if not has_version(p, x):
print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr) print(
"Grouping file {0} with pad {1}".format(x, p['padid']),
file=sys.stderr,
)
p['versions'].append(wrappath(x)) p['versions'].append(wrappath(x))
else: else:
print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr) print(
"Skipping existing version {0} ({1})...".format(
x, p['padid']
),
file=sys.stderr,
)
removelist.append(x) removelist.append(x)
# Removed Matches files # Removed Matches files
for x in removelist: for x in removelist:
inputs.remove(x) inputs.remove(x)
print ("Remaining files:", file=sys.stderr) print("Remaining files:", file=sys.stderr)
for x in inputs: for x in inputs:
print (x, file=sys.stderr) print(x, file=sys.stderr)
print (file=sys.stderr) print(file=sys.stderr)
# Add "fake" pads for remaining files # Add "fake" pads for remaining files
for x in inputs: for x in inputs:
args.pads.append(metaforpaths([x])) args.pads.append(metaforpaths([x]))
@ -243,7 +348,9 @@ def main (args):
# order items & apply limit # order items & apply limit
if args.order == "lastedited": if args.order == "lastedited":
args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse) args.pads.sort(
key=lambda x: x.get("lastedited_iso"), reverse=args.reverse
)
elif args.order == "pad": elif args.order == "pad":
args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse) args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse)
elif args.order == "padid": elif args.order == "padid":
@ -251,17 +358,20 @@ def main (args):
elif args.order == "revisions": elif args.order == "revisions":
args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse) args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse)
elif args.order == "authors": elif args.order == "authors":
args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse) args.pads.sort(
key=lambda x: len(x.get("authors")), reverse=args.reverse
)
elif args.order == "custom": elif args.order == "custom":
# TODO: make this list non-static, but a variable that can be given from the CLI # TODO: make this list non-static, but a variable that can be given from the CLI
customorder = [ customorder = [
'nooo.relearn.preamble', 'nooo.relearn.preamble',
'nooo.relearn.activating.the.archive', 'nooo.relearn.activating.the.archive',
'nooo.relearn.call.for.proposals', 'nooo.relearn.call.for.proposals',
'nooo.relearn.call.for.proposals-proposal-footnote', 'nooo.relearn.call.for.proposals-proposal-footnote',
'nooo.relearn.colophon'] 'nooo.relearn.colophon',
]
order = [] order = []
for x in customorder: for x in customorder:
for pad in args.pads: for pad in args.pads:
@ -272,7 +382,7 @@ def main (args):
raise Exception("That ordering is not implemented!") raise Exception("That ordering is not implemented!")
if args.limit: if args.limit:
args.pads = args.pads[:args.limit] args.pads = args.pads[: args.limit]
# add versions_by_type, add in full text # add versions_by_type, add in full text
# add link (based on args.link) # add link (based on args.link)
@ -289,7 +399,7 @@ def main (args):
if "text" in versions_by_type: if "text" in versions_by_type:
# try: # try:
with open (versions_by_type["text"]["path"]) as f: with open(versions_by_type["text"]["path"]) as f:
content = f.read() content = f.read()
# print('content:', content) # print('content:', content)
# [Relearn] Add pandoc command here? # [Relearn] Add pandoc command here?
@ -297,7 +407,7 @@ def main (args):
# print('html:', html) # print('html:', html)
p["text"] = html p["text"] = html
# except FileNotFoundError: # except FileNotFoundError:
# p['text'] = 'ERROR' # p['text'] = 'ERROR'
# ADD IN LINK TO PAD AS "link" # ADD IN LINK TO PAD AS "link"
for v in linkversions: for v in linkversions:
@ -312,6 +422,6 @@ def main (args):
if args.output: if args.output:
with open(args.output, "w") as f: with open(args.output, "w") as f:
print (template.render(vars(args)), file=f) print(template.render(vars(args)), file=f)
else: else:
print (template.render(vars(args))) print(template.render(vars(args)))

View File

@ -1,17 +1,19 @@
import json
import os
import re
import sys
from argparse import ArgumentParser from argparse import ArgumentParser
import sys, json, re, os
from datetime import datetime from datetime import datetime
from fnmatch import fnmatch
from time import sleep
from urllib.parse import quote, urlencode
from urllib.request import HTTPError, URLError, urlopen
from xml.etree import ElementTree as ET
from urllib.parse import urlencode, quote import html5lib
from urllib.request import urlopen, URLError, HTTPError
from etherpump.commands.common import * from etherpump.commands.common import *
from time import sleep
from etherpump.commands.html5tidy import html5tidy from etherpump.commands.html5tidy import html5tidy
import html5lib
from xml.etree import ElementTree as ET
from fnmatch import fnmatch
# debugging # debugging
# import ElementTree as ET # import ElementTree as ET
@ -28,43 +30,144 @@ use/prefer public interfaces ? (export functions)
""" """
def try_deleting (files):
def try_deleting(files):
for f in files: for f in files:
try: try:
os.remove(f) os.remove(f)
except OSError as e: except OSError as e:
pass pass
def main (args):
p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)") def main(args):
p = ArgumentParser(
"Check for pads that have changed since last sync (according to .meta.json)"
)
p.add_argument("padid", nargs="*", default=[]) p.add_argument("padid", nargs="*", default=[])
p.add_argument("--glob", default=False, help="download pads matching a glob pattern") p.add_argument(
"--glob", default=False, help="download pads matching a glob pattern"
)
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherpump/settings.json") p.add_argument(
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") "--padinfo",
p.add_argument("--pub", default="p", help="folder to store files for public pads, default: p") default=".etherpump/settings.json",
p.add_argument("--group", default="g", help="folder to store files for group pads, default: g") help="settings, default: .etherpump/settings.json",
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") )
p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False") p.add_argument(
p.add_argument("--text", default=False, action="store_true", help="download text to PADID.txt, default: False") "--zerorevs",
p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False") default=False,
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.diff.html, default: False") action="store_true",
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False") help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)",
p.add_argument("--folder", default=False, action="store_true", help="dump files in a folder named PADID (meta, text, html, dhtml), default: False") )
p.add_argument("--output", default=False, action="store_true", help="output changed padids on stdout") p.add_argument(
p.add_argument("--force", default=False, action="store_true", help="reload, even if revisions count matches previous") "--pub",
p.add_argument("--no-raw-ext", default=False, action="store_true", help="save plain text as padname with no (additional) extension") default="p",
p.add_argument("--fix-names", default=False, action="store_true", help="normalize padid's (no spaces, special control chars) for use in file names") help="folder to store files for public pads, default: p",
)
p.add_argument(
"--group",
default="g",
help="folder to store files for group pads, default: g",
)
p.add_argument(
"--skip",
default=None,
type=int,
help="skip this many items, default: None",
)
p.add_argument(
"--meta",
default=False,
action="store_true",
help="download meta to PADID.meta.json, default: False",
)
p.add_argument(
"--text",
default=False,
action="store_true",
help="download text to PADID.txt, default: False",
)
p.add_argument(
"--html",
default=False,
action="store_true",
help="download html to PADID.html, default: False",
)
p.add_argument(
"--dhtml",
default=False,
action="store_true",
help="download dhtml to PADID.diff.html, default: False",
)
p.add_argument(
"--all",
default=False,
action="store_true",
help="download all files (meta, text, html, dhtml), default: False",
)
p.add_argument(
"--folder",
default=False,
action="store_true",
help="dump files in a folder named PADID (meta, text, html, dhtml), default: False",
)
p.add_argument(
"--output",
default=False,
action="store_true",
help="output changed padids on stdout",
)
p.add_argument(
"--force",
default=False,
action="store_true",
help="reload, even if revisions count matches previous",
)
p.add_argument(
"--no-raw-ext",
default=False,
action="store_true",
help="save plain text as padname with no (additional) extension",
)
p.add_argument(
"--fix-names",
default=False,
action="store_true",
help="normalize padid's (no spaces, special control chars) for use in file names",
)
p.add_argument("--filter-ext", default=None, help="filter pads by extension") p.add_argument(
"--filter-ext", default=None, help="filter pads by extension"
)
p.add_argument("--css", default="/styles.css", help="add css url to output pages, default: /styles.css") p.add_argument(
p.add_argument("--script", default="/versions.js", help="add script url to output pages, default: /versions.js") "--css",
default="/styles.css",
help="add css url to output pages, default: /styles.css",
)
p.add_argument(
"--script",
default="/versions.js",
help="add script url to output pages, default: /versions.js",
)
p.add_argument("--nopublish", default="__NOPUBLISH__", help="no publish magic word, default: __NOPUBLISH__") p.add_argument(
p.add_argument("--publish", default="__PUBLISH__", help="the publish magic word, default: __PUBLISH__") "--nopublish",
p.add_argument("--publish-opt-in", default=False, action="store_true", help="ensure `--publish` is honoured instead of `--nopublish`") default="__NOPUBLISH__",
help="no publish magic word, default: __NOPUBLISH__",
)
p.add_argument(
"--publish",
default="__PUBLISH__",
help="the publish magic word, default: __PUBLISH__",
)
p.add_argument(
"--publish-opt-in",
default=False,
action="store_true",
help="ensure `--publish` is honoured instead of `--nopublish`",
)
args = p.parse_args(args) args = p.parse_args(args)
@ -79,16 +182,20 @@ def main (args):
if args.padid: if args.padid:
padids = args.padid padids = args.padid
elif args.glob: elif args.glob:
padids = getjson(info['localapiurl']+'listAllPads?'+urlencode(data))['data']['padIDs'] padids = getjson(
info['localapiurl'] + 'listAllPads?' + urlencode(data)
)['data']['padIDs']
padids = [x for x in padids if fnmatch(x, args.glob)] padids = [x for x in padids if fnmatch(x, args.glob)]
else: else:
padids = getjson(info['localapiurl']+'listAllPads?'+urlencode(data))['data']['padIDs'] padids = getjson(
info['localapiurl'] + 'listAllPads?' + urlencode(data)
)['data']['padIDs']
padids.sort() padids.sort()
numpads = len(padids) numpads = len(padids)
# maxmsglen = 0 # maxmsglen = 0
count = 0 count = 0
for i, padid in enumerate(padids): for i, padid in enumerate(padids):
if args.skip != None and i<args.skip: if args.skip != None and i < args.skip:
continue continue
progressbar(i, numpads, padid) progressbar(i, numpads, padid)
@ -110,47 +217,73 @@ def main (args):
if os.path.exists(metapath): if os.path.exists(metapath):
with open(metapath) as f: with open(metapath) as f:
meta.update(json.load(f)) meta.update(json.load(f))
revisions = getjson(info['localapiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] revisions = getjson(
info['localapiurl']
+ 'getRevisionsCount?'
+ urlencode(data)
)['data']['revisions']
if meta['revisions'] == revisions and not args.force: if meta['revisions'] == revisions and not args.force:
skip=True skip = True
break break
meta['padid'] = padid # .encode("utf-8") meta['padid'] = padid # .encode("utf-8")
versions = meta["versions"] = [] versions = meta["versions"] = []
versions.append({ versions.append(
"url": padurlbase + quote(padid), {
"type": "pad", "url": padurlbase + quote(padid),
"code": 200 "type": "pad",
}) "code": 200,
}
)
if revisions == None: if revisions == None:
meta['revisions'] = getjson(info['localapiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] meta['revisions'] = getjson(
info['localapiurl']
+ 'getRevisionsCount?'
+ urlencode(data)
)['data']['revisions']
else: else:
meta['revisions' ] = revisions meta['revisions'] = revisions
if (meta['revisions'] == 0) and (not args.zerorevs): if (meta['revisions'] == 0) and (not args.zerorevs):
# print("Skipping zero revs", file=sys.stderr) # print("Skipping zero revs", file=sys.stderr)
skip=True skip = True
break break
# todo: load more metadata! # todo: load more metadata!
meta['group'], meta['pad'] = splitpadname(padid) meta['group'], meta['pad'] = splitpadname(padid)
meta['pathbase'] = p meta['pathbase'] = p
meta['lastedited_raw'] = int(getjson(info['localapiurl']+'getLastEdited?'+urlencode(data))['data']['lastEdited']) meta['lastedited_raw'] = int(
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat() getjson(
meta['author_ids'] = getjson(info['localapiurl']+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] info['localapiurl'] + 'getLastEdited?' + urlencode(data)
)['data']['lastEdited']
)
meta['lastedited_iso'] = datetime.fromtimestamp(
int(meta['lastedited_raw']) / 1000
).isoformat()
meta['author_ids'] = getjson(
info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
)['data']['authorIDs']
break break
except HTTPError as e: except HTTPError as e:
tries += 1 tries += 1
if tries > 3: if tries > 3:
print ("Too many failures ({0}), skipping".format(padid), file=sys.stderr) print(
skip=True "Too many failures ({0}), skipping".format(padid),
file=sys.stderr,
)
skip = True
break break
else: else:
sleep(3) sleep(3)
except TypeError as e: except TypeError as e:
print ("Type Error loading pad {0} (phantom pad?), skipping".format(padid), file=sys.stderr) print(
skip=True "Type Error loading pad {0} (phantom pad?), skipping".format(
padid
),
file=sys.stderr,
)
skip = True
break break
if skip: if skip:
@ -159,7 +292,7 @@ def main (args):
count += 1 count += 1
if args.output: if args.output:
print (padid) print(padid)
if args.all or (args.meta or args.text or args.html or args.dhtml): if args.all or (args.meta or args.text or args.html or args.dhtml):
try: try:
@ -168,7 +301,7 @@ def main (args):
pass pass
if args.all or args.text: if args.all or args.text:
text = getjson(info['localapiurl']+'getText?'+urlencode(data)) text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))
ver = {"type": "text"} ver = {"type": "text"}
versions.append(ver) versions.append(ver)
ver["code"] = text["_code"] ver["code"] = text["_code"]
@ -180,17 +313,31 @@ def main (args):
########################################## ##########################################
if args.nopublish and args.nopublish in text: if args.nopublish and args.nopublish in text:
# NEED TO PURGE ANY EXISTING DOCS # NEED TO PURGE ANY EXISTING DOCS
try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json")) try_deleting(
(
p + raw_ext,
p + ".raw.html",
p + ".diff.html",
p + ".meta.json",
)
)
continue continue
########################################## ##########################################
## ENFORCE __PUBLISH__ MAGIC WORD ## ENFORCE __PUBLISH__ MAGIC WORD
########################################## ##########################################
if args.publish_opt_in and args.publish not in text: if args.publish_opt_in and args.publish not in text:
try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json")) try_deleting(
(
p + raw_ext,
p + ".raw.html",
p + ".diff.html",
p + ".meta.json",
)
)
continue continue
ver["path"] = p+raw_ext ver["path"] = p + raw_ext
ver["url"] = quote(ver["path"]) ver["url"] = quote(ver["path"])
with open(ver["path"], "w") as f: with open(ver["path"], "w") as f:
f.write(text) f.write(text)
@ -199,38 +346,86 @@ def main (args):
links = [] links = []
if args.css: if args.css:
links.append({"href":args.css, "rel":"stylesheet"}) links.append({"href": args.css, "rel": "stylesheet"})
# todo, make this process reflect which files actually were made # todo, make this process reflect which files actually were made
versionbaseurl = quote(padid) versionbaseurl = quote(padid)
links.append({"href":versions[0]["url"], "rel":"alternate", "type":"text/html", "title":"Etherpad"}) links.append(
{
"href": versions[0]["url"],
"rel": "alternate",
"type": "text/html",
"title": "Etherpad",
}
)
if args.all or args.text: if args.all or args.text:
links.append({"href":versionbaseurl+raw_ext, "rel":"alternate", "type":"text/plain", "title":"Plain text"}) links.append(
{
"href": versionbaseurl + raw_ext,
"rel": "alternate",
"type": "text/plain",
"title": "Plain text",
}
)
if args.all or args.html: if args.all or args.html:
links.append({"href":versionbaseurl+".raw.html", "rel":"alternate", "type":"text/html", "title":"HTML"}) links.append(
{
"href": versionbaseurl + ".raw.html",
"rel": "alternate",
"type": "text/html",
"title": "HTML",
}
)
if args.all or args.dhtml: if args.all or args.dhtml:
links.append({"href":versionbaseurl+".diff.html", "rel":"alternate", "type":"text/html", "title":"HTML with author colors"}) links.append(
{
"href": versionbaseurl + ".diff.html",
"rel": "alternate",
"type": "text/html",
"title": "HTML with author colors",
}
)
if args.all or args.meta: if args.all or args.meta:
links.append({"href":versionbaseurl+".meta.json", "rel":"alternate", "type":"application/json", "title":"Meta data"}) links.append(
{
"href": versionbaseurl + ".meta.json",
"rel": "alternate",
"type": "application/json",
"title": "Meta data",
}
)
# links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"}) # links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"})
if args.all or args.dhtml: if args.all or args.dhtml:
data['startRev'] = "0" data['startRev'] = "0"
html = getjson(info['localapiurl']+'createDiffHTML?'+urlencode(data)) html = getjson(
info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
)
ver = {"type": "diffhtml"} ver = {"type": "diffhtml"}
versions.append(ver) versions.append(ver)
ver["code"] = html["_code"] ver["code"] = html["_code"]
if html["_code"] == 200: if html["_code"] == 200:
try: try:
html = html['data']['html'] html = html['data']['html']
ver["path"] = p+".diff.html" ver["path"] = p + ".diff.html"
ver["url"] = quote(ver["path"]) ver["url"] = quote(ver["path"])
# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False) # doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False) doc = html5lib.parse(
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links) html, treebuilder="etree", namespaceHTMLElements=False
)
html5tidy(
doc,
indent=True,
title=padid,
scripts=args.script,
links=links,
)
with open(ver["path"], "w") as f: with open(ver["path"], "w") as f:
# f.write(html.encode("utf-8")) # f.write(html.encode("utf-8"))
print(ET.tostring(doc, method="html", encoding="unicode"), file=f) print(
ET.tostring(doc, method="html", encoding="unicode"),
file=f,
)
except TypeError: except TypeError:
# Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file! # Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!
ver["message"] = html["message"] ver["message"] = html["message"]
@ -239,19 +434,30 @@ def main (args):
# Process text, html, dhtml, all options # Process text, html, dhtml, all options
if args.all or args.html: if args.all or args.html:
html = getjson(info['localapiurl']+'getHTML?'+urlencode(data)) html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))
ver = {"type": "html"} ver = {"type": "html"}
versions.append(ver) versions.append(ver)
ver["code"] = html["_code"] ver["code"] = html["_code"]
if html["_code"] == 200: if html["_code"] == 200:
html = html['data']['html'] html = html['data']['html']
ver["path"] = p+".raw.html" ver["path"] = p + ".raw.html"
ver["url"] = quote(ver["path"]) ver["url"] = quote(ver["path"])
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False) doc = html5lib.parse(
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links) html, treebuilder="etree", namespaceHTMLElements=False
)
html5tidy(
doc,
indent=True,
title=padid,
scripts=args.script,
links=links,
)
with open(ver["path"], "w") as f: with open(ver["path"], "w") as f:
# f.write(html.encode("utf-8")) # f.write(html.encode("utf-8"))
print (ET.tostring(doc, method="html", encoding="unicode"), file=f) print(
ET.tostring(doc, method="html", encoding="unicode"),
file=f,
)
# output meta # output meta
if args.all or args.meta: if args.all or args.meta:

View File

@ -1,14 +1,18 @@
from argparse import ArgumentParser
import json import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import urlopen from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args): def main(args):
p = ArgumentParser("call getRevisionsCount for the given padid") p = ArgumentParser("call getRevisionsCount for the given padid")
p.add_argument("padid", help="the padid") p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true") p.add_argument("--showurl", default=False, action="store_true")
args = p.parse_args(args) args = p.parse_args(args)
@ -18,9 +22,9 @@ def main(args):
data = {} data = {}
data['apikey'] = info['apikey'] data['apikey'] = info['apikey']
data['padID'] = args.padid.encode("utf-8") data['padID'] = args.padid.encode("utf-8")
requesturl = apiurl+'getRevisionsCount?'+urlencode(data) requesturl = apiurl + 'getRevisionsCount?' + urlencode(data)
if args.showurl: if args.showurl:
print (requesturl) print(requesturl)
else: else:
results = json.load(urlopen(requesturl))['data']['revisions'] results = json.load(urlopen(requesturl))['data']['revisions']
print (results) print(results)

View File

@ -1,39 +1,60 @@
import json
import sys
from argparse import ArgumentParser from argparse import ArgumentParser
import json, sys from urllib.error import HTTPError, URLError
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import urlopen from urllib.request import urlopen
from urllib.error import HTTPError, URLError
import requests import requests
LIMIT_BYTES = 100 * 1000
LIMIT_BYTES = 100*1000
def main(args): def main(args):
p = ArgumentParser("calls the setHTML API function for the given padid") p = ArgumentParser("calls the setHTML API function for the given padid")
p.add_argument("padid", help="the padid") p.add_argument("padid", help="the padid")
p.add_argument("--html", default=None, help="html, default: read from stdin") p.add_argument(
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") "--html", default=None, help="html, default: read from stdin"
)
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true") p.add_argument("--showurl", default=False, action="store_true")
# p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") # p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--create", default=False, action="store_true", help="flag to create pad if necessary") p.add_argument(
p.add_argument("--limit", default=False, action="store_true", help="limit text to 100k (etherpad limit)") "--create",
default=False,
action="store_true",
help="flag to create pad if necessary",
)
p.add_argument(
"--limit",
default=False,
action="store_true",
help="limit text to 100k (etherpad limit)",
)
args = p.parse_args(args) args = p.parse_args(args)
with open(args.padinfo) as f: with open(args.padinfo) as f:
info = json.load(f) info = json.load(f)
apiurl = info.get("apiurl") apiurl = info.get("apiurl")
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) # apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
# data = {} # data = {}
# data['apikey'] = info['apikey'] # data['apikey'] = info['apikey']
# data['padID'] = args.padid # is utf-8 encoded # data['padID'] = args.padid # is utf-8 encoded
createPad = False createPad = False
if args.create: if args.create:
# check if it's in fact necessary # check if it's in fact necessary
requesturl = apiurl+'getRevisionsCount?'+urlencode({'apikey': info['apikey'], 'padID': args.padid}) requesturl = (
apiurl
+ 'getRevisionsCount?'
+ urlencode({'apikey': info['apikey'], 'padID': args.padid})
)
results = json.load(urlopen(requesturl)) results = json.load(urlopen(requesturl))
print (json.dumps(results, indent=2), file=sys.stderr) print(json.dumps(results, indent=2), file=sys.stderr)
if results['code'] != 0: if results['code'] != 0:
createPad = True createPad = True
@ -47,21 +68,27 @@ def main(args):
params['padID'] = args.padid params['padID'] = args.padid
if createPad: if createPad:
requesturl = apiurl+'createPad' requesturl = apiurl + 'createPad'
if args.showurl: if args.showurl:
print (requesturl) print(requesturl)
results = requests.post(requesturl, params=params, data={'text': ''}) # json.load(urlopen(requesturl)) results = requests.post(
requesturl, params=params, data={'text': ''}
) # json.load(urlopen(requesturl))
results = json.loads(results.text) results = json.loads(results.text)
print (json.dumps(results, indent=2)) print(json.dumps(results, indent=2))
if len(html) > LIMIT_BYTES and args.limit: if len(html) > LIMIT_BYTES and args.limit:
print ("limiting", len(text), LIMIT_BYTES, file=sys.stderr) print("limiting", len(text), LIMIT_BYTES, file=sys.stderr)
html = html[:LIMIT_BYTES] html = html[:LIMIT_BYTES]
requesturl = apiurl+'setHTML' requesturl = apiurl + 'setHTML'
if args.showurl: if args.showurl:
print (requesturl) print(requesturl)
# params['html'] = html # params['html'] = html
results = requests.post(requesturl, params={'apikey': info['apikey']}, data={'apikey': info['apikey'], 'padID': args.padid, 'html': html}) # json.load(urlopen(requesturl)) results = requests.post(
requesturl,
params={'apikey': info['apikey']},
data={'apikey': info['apikey'], 'padID': args.padid, 'html': html},
) # json.load(urlopen(requesturl))
results = json.loads(results.text) results = json.loads(results.text)
print (json.dumps(results, indent=2)) print(json.dumps(results, indent=2))

View File

@ -1,24 +1,39 @@
import json
import sys
from argparse import ArgumentParser from argparse import ArgumentParser
import json, sys from urllib.parse import quote, urlencode
from urllib.request import HTTPError, URLError, urlopen
from urllib.parse import urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
import requests import requests
LIMIT_BYTES = 100 * 1000
LIMIT_BYTES = 100*1000
def main(args): def main(args):
p = ArgumentParser("calls the getText API function for the given padid") p = ArgumentParser("calls the getText API function for the given padid")
p.add_argument("padid", help="the padid") p.add_argument("padid", help="the padid")
p.add_argument("--text", default=None, help="text, default: read from stdin") p.add_argument(
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") "--text", default=None, help="text, default: read from stdin"
)
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true") p.add_argument("--showurl", default=False, action="store_true")
# p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") # p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--create", default=False, action="store_true", help="flag to create pad if necessary") p.add_argument(
p.add_argument("--limit", default=False, action="store_true", help="limit text to 100k (etherpad limit)") "--create",
default=False,
action="store_true",
help="flag to create pad if necessary",
)
p.add_argument(
"--limit",
default=False,
action="store_true",
help="limit text to 100k (etherpad limit)",
)
args = p.parse_args(args) args = p.parse_args(args)
with open(args.padinfo) as f: with open(args.padinfo) as f:
@ -27,11 +42,11 @@ def main(args):
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) # apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
data = {} data = {}
data['apikey'] = info['apikey'] data['apikey'] = info['apikey']
data['padID'] = args.padid # is utf-8 encoded data['padID'] = args.padid # is utf-8 encoded
createPad = False createPad = False
if args.create: if args.create:
requesturl = apiurl+'getRevisionsCount?'+urlencode(data) requesturl = apiurl + 'getRevisionsCount?' + urlencode(data)
results = json.load(urlopen(requesturl)) results = json.load(urlopen(requesturl))
# print (json.dumps(results, indent=2)) # print (json.dumps(results, indent=2))
if results['code'] != 0: if results['code'] != 0:
@ -43,20 +58,26 @@ def main(args):
text = sys.stdin.read() text = sys.stdin.read()
if len(text) > LIMIT_BYTES and args.limit: if len(text) > LIMIT_BYTES and args.limit:
print ("limiting", len(text), LIMIT_BYTES) print("limiting", len(text), LIMIT_BYTES)
text = text[:LIMIT_BYTES] text = text[:LIMIT_BYTES]
data['text'] = text data['text'] = text
if createPad: if createPad:
requesturl = apiurl+'createPad' requesturl = apiurl + 'createPad'
else: else:
requesturl = apiurl+'setText' requesturl = apiurl + 'setText'
if args.showurl: if args.showurl:
print (requesturl) print(requesturl)
results = requests.post(requesturl, params=data) # json.load(urlopen(requesturl)) results = requests.post(
requesturl, params=data
) # json.load(urlopen(requesturl))
results = json.loads(results.text) results = json.loads(results.text)
if results['code'] != 0: if results['code'] != 0:
print ("setText: ERROR ({0}) on pad {1}: {2}".format(results['code'], args.padid, results['message'])) print(
"setText: ERROR ({0}) on pad {1}: {2}".format(
results['code'], args.padid, results['message']
)
)
# json.dumps(results, indent=2) # json.dumps(results, indent=2)

View File

@ -1,17 +1,25 @@
import json
import re
import sys
from argparse import ArgumentParser from argparse import ArgumentParser
import json, sys, re
from .common import * from .common import *
""" """
Extract and output selected fields of metadata Extract and output selected fields of metadata
""" """
def main (args):
p = ArgumentParser("extract & display meta data from a specific .meta.json file, or for a given padid (nb: it still looks for a .meta.json file)") def main(args):
p = ArgumentParser(
"extract & display meta data from a specific .meta.json file, or for a given padid (nb: it still looks for a .meta.json file)"
)
p.add_argument("--path", default=None, help="read from a meta.json file") p.add_argument("--path", default=None, help="read from a meta.json file")
p.add_argument("--padid", default=None, help="read meta for this padid") p.add_argument("--padid", default=None, help="read meta for this padid")
p.add_argument("--format", default="{padid}", help="format str, default: {padid}") p.add_argument(
"--format", default="{padid}", help="format str, default: {padid}"
)
args = p.parse_args(args) args = p.parse_args(args)
path = args.path path = args.path
@ -19,7 +27,7 @@ def main (args):
path = padpath(args.padid) + ".meta.json" path = padpath(args.padid) + ".meta.json"
if not path: if not path:
print ("Must specify either --path or --padid") print("Must specify either --path or --padid")
sys.exit(-1) sys.exit(-1)
with open(path) as f: with open(path) as f:
@ -27,5 +35,4 @@ def main (args):
formatstr = args.format.decode("utf-8") formatstr = args.format.decode("utf-8")
formatstr = re.sub(r"{(\w+)}", r"{0[\1]}", formatstr) formatstr = re.sub(r"{(\w+)}", r"{0[\1]}", formatstr)
print (formatstr.format(meta).encode("utf-8")) print(formatstr.format(meta).encode("utf-8"))

View File

@ -1,13 +1,17 @@
import json
import os
import re
import sys
from argparse import ArgumentParser from argparse import ArgumentParser
import sys, json, re, os
from datetime import datetime from datetime import datetime
from math import ceil, floor
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import urlopen from urllib.request import urlopen
from urllib.error import HTTPError, URLError
from math import ceil, floor
from .common import * from .common import *
""" """
status (meta): status (meta):
Update meta data files for those that have changed. Update meta data files for those that have changed.
@ -22,16 +26,18 @@ complicates the "syncing" idea....
""" """
class PadItemException (Exception):
class PadItemException(Exception):
pass pass
class PadItem ():
def __init__ (self, padid=None, path=None, padexists=False): class PadItem:
def __init__(self, padid=None, path=None, padexists=False):
self.padexists = padexists self.padexists = padexists
if padid and path: if padid and path:
raise PadItemException("only give padid or path") raise PadItemException("only give padid or path")
if not (padid or path): if not (padid or path):
raise PadItemException("either padid or path must be specified") raise PadItemException("either padid or path must be specified")
if padid: if padid:
self.padid = padid self.padid = padid
self.path = padpath(padid, group_path="g") self.path = padpath(padid, group_path="g")
@ -40,7 +46,7 @@ class PadItem ():
self.padid = padpath2id(path) self.padid = padpath2id(path)
@property @property
def status (self): def status(self):
if self.fileexists: if self.fileexists:
if self.padexists: if self.padexists:
return "S" return "S"
@ -52,26 +58,77 @@ class PadItem ():
return "?" return "?"
@property @property
def fileexists (self): def fileexists(self):
return os.path.exists(self.path) return os.path.exists(self.path)
def ignore_p (path, settings=None):
def ignore_p(path, settings=None):
if path.startswith("."): if path.startswith("."):
return True return True
def main (args):
p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)") def main(args):
p = ArgumentParser(
"Check for pads that have changed since last sync (according to .meta.json)"
)
# p.add_argument("padid", nargs="*", default=[]) # p.add_argument("padid", nargs="*", default=[])
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument(
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)") "--padinfo",
p.add_argument("--pub", default=".", help="folder to store files for public pads, default: pub") default=".etherpump/settings.json",
p.add_argument("--group", default="g", help="folder to store files for group pads, default: g") help="settings, default: .etherdump/settings.json",
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None") )
p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False") p.add_argument(
p.add_argument("--text", default=False, action="store_true", help="download text to PADID.txt, default: False") "--zerorevs",
p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False") default=False,
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.dhtml, default: False") action="store_true",
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False") help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)",
)
p.add_argument(
"--pub",
default=".",
help="folder to store files for public pads, default: pub",
)
p.add_argument(
"--group",
default="g",
help="folder to store files for group pads, default: g",
)
p.add_argument(
"--skip",
default=None,
type=int,
help="skip this many items, default: None",
)
p.add_argument(
"--meta",
default=False,
action="store_true",
help="download meta to PADID.meta.json, default: False",
)
p.add_argument(
"--text",
default=False,
action="store_true",
help="download text to PADID.txt, default: False",
)
p.add_argument(
"--html",
default=False,
action="store_true",
help="download html to PADID.html, default: False",
)
p.add_argument(
"--dhtml",
default=False,
action="store_true",
help="download dhtml to PADID.dhtml, default: False",
)
p.add_argument(
"--all",
default=False,
action="store_true",
help="download all files (meta, text, html, dhtml), default: False",
)
args = p.parse_args(args) args = p.parse_args(args)
info = loadpadinfo(args.padinfo) info = loadpadinfo(args.padinfo)
@ -81,7 +138,9 @@ def main (args):
padsbypath = {} padsbypath = {}
# listAllPads # listAllPads
padids = getjson(info['apiurl']+'listAllPads?'+urlencode(data))['data']['padIDs'] padids = getjson(info['apiurl'] + 'listAllPads?' + urlencode(data))['data'][
'padIDs'
]
padids.sort() padids.sort()
for padid in padids: for padid in padids:
pad = PadItem(padid=padid, padexists=True) pad = PadItem(padid=padid, padexists=True)
@ -104,9 +163,9 @@ def main (args):
if p.status != curstat: if p.status != curstat:
curstat = p.status curstat = p.status
if curstat == "F": if curstat == "F":
print ("New/changed files") print("New/changed files")
elif curstat == "P": elif curstat == "P":
print ("New/changed pads") print("New/changed pads")
elif curstat == ".": elif curstat == ".":
print ("Up to date") print("Up to date")
print (" ", p.status, p.padid) print(" ", p.status, p.padid)

12
pyproject.toml Normal file
View File

@ -0,0 +1,12 @@
[build-system]
requires = [
"setuptools>=41.0.0",
"setuptools-scm",
"wheel",
]
build-backend = "setuptools.build_meta"
[tool.black]
line-length = 80
target-version = ['py35', 'py36', 'py37']
skip-string-normalization = true

9
setup.cfg Normal file
View File

@ -0,0 +1,9 @@
[flake8]
max-line-length = 80
[isort]
known_first_party = etherpump
line_length = 80
multi_line_output = 3
include_trailing_comma = True
skip = .venv

View File

@ -1,8 +1,9 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from etherpump import VERSION
from setuptools import find_packages, setup from setuptools import find_packages, setup
from etherpump import VERSION
with open('README.md', 'r') as handle: with open('README.md', 'r') as handle:
long_description = handle.read() long_description = handle.read()