Add maintenance tools and run them

This commit is contained in:
Luke Murphy 2019-09-27 23:14:30 +02:00
parent 159165d2d5
commit 8f18594833
No known key found for this signature in database
GPG Key ID: 5E2EF5A63E3718CC
27 changed files with 1253 additions and 465 deletions

View File

@ -1,4 +1,13 @@
SOURCE_DIRS := bin/ etherpump/
publish:
@rm -rf dist
@python setup.py bdist_wheel
@twine upload dist/*
format:
@black $(SOURCE_DIRS)
@isort -rc $(SOURCE_DIRS)
lint:
@flake8 $(SOURCE_DIRS)

View File

@ -126,6 +126,23 @@ Publishing
You should have a [PyPi](https://pypi.org/) account and be added as an owner/maintainer on the [etherpump package](https://pypi.org/project/etherpump/).
Maintenance utilities
---------------------
Tools to help things stay tidy over time.
```bash
$ pip install flake8 isort black
$ make format
$ make lint
```
Please see the following links for further reading:
* http://flake8.pycqa.org
* https://isort.readthedocs.io
* https://black.readthedocs.io
License
=======

View File

@ -1,8 +1,9 @@
#!/usr/bin/env python3
from etherpump import VERSION
import sys
from etherpump import VERSION
usage = """Usage:
etherpump CMD
@ -43,7 +44,9 @@ except IndexError:
sys.exit(0)
try:
# http://stackoverflow.com/questions/301134/dynamic-module-import-in-python
cmdmod = __import__("etherpump.commands.%s" % cmd, fromlist=["etherdump.commands"])
cmdmod = __import__(
"etherpump.commands.%s" % cmd, fromlist=["etherdump.commands"]
)
cmdmod.main(args)
except ImportError as e:
print("Error performing command '{0}'\n(python said: {1})\n".format(cmd, e))

View File

@ -1,4 +1,4 @@
import os
DATAPATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data")
VERSION = '0.0.2'
VERSION = '0.0.2'

View File

@ -1,8 +1,10 @@
#!/usr/bin/env python
import json
import os
from argparse import ArgumentParser
import json, os
def main(args):
p = ArgumentParser("")
@ -18,6 +20,6 @@ def main(args):
ret.append(meta)
if args.indent:
print (json.dumps(ret, indent=args.indent))
print(json.dumps(ret, indent=args.indent))
else:
print (json.dumps(ret))
print(json.dumps(ret))

View File

@ -1,24 +1,35 @@
import re, os, json, sys
import json
import os
import re
import sys
from html.entities import name2codepoint
from math import ceil, floor
from time import sleep
from urllib.parse import urlparse, urlunparse, urlencode, quote_plus, unquote_plus
from urllib.request import urlopen, URLError, HTTPError
from html.entities import name2codepoint
from urllib.parse import (
quote_plus,
unquote_plus,
urlencode,
urlparse,
urlunparse,
)
from urllib.request import HTTPError, URLError, urlopen
groupnamepat = re.compile(r"^g\.(\w+)\$")
def splitpadname (padid):
def splitpadname(padid):
m = groupnamepat.match(padid)
if m:
return(m.group(1), padid[m.end():])
return (m.group(1), padid[m.end() :])
else:
return ("", padid)
def padurl (padid, ):
def padurl(padid,):
return padid
def padpath (padid, pub_path="", group_path="", normalize=False):
def padpath(padid, pub_path="", group_path="", normalize=False):
g, p = splitpadname(padid)
p = quote_plus(p)
if normalize:
@ -32,7 +43,8 @@ def padpath (padid, pub_path="", group_path="", normalize=False):
else:
return os.path.join(pub_path, p)
def padpath2id (path):
def padpath2id(path):
if type(path) == str:
path = path.encode("utf-8")
dd, p = os.path.split(path)
@ -43,7 +55,8 @@ def padpath2id (path):
else:
return p.decode("utf-8")
def getjson (url, max_retry=3, retry_sleep_time=3):
def getjson(url, max_retry=3, retry_sleep_time=3):
ret = {}
ret["_retries"] = 0
while ret["_retries"] <= max_retry:
@ -61,13 +74,14 @@ def getjson (url, max_retry=3, retry_sleep_time=3):
except ValueError as e:
url = "http://localhost" + url
except HTTPError as e:
print ("HTTPError {0}".format(e), file=sys.stderr)
print("HTTPError {0}".format(e), file=sys.stderr)
ret["_code"] = e.code
ret["_retries"]+=1
ret["_retries"] += 1
if retry_sleep_time:
sleep(retry_sleep_time)
return ret
def loadpadinfo(p):
with open(p) as f:
info = json.load(f)
@ -75,17 +89,17 @@ def loadpadinfo(p):
info['localapiurl'] = info.get('apiurl')
return info
def progressbar (i, num, label="", file=sys.stderr):
def progressbar(i, num, label="", file=sys.stderr):
p = float(i) / num
percentage = int(floor(p*100))
bars = int(ceil(p*20))
bar = ("*"*bars) + ("-"*(20-bars))
msg = "\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label)
percentage = int(floor(p * 100))
bars = int(ceil(p * 20))
bar = ("*" * bars) + ("-" * (20 - bars))
msg = "\r{0} {1}/{2} {3}... ".format(bar, (i + 1), num, label)
sys.stderr.write(msg)
sys.stderr.flush()
# Python developer Fredrik Lundh (author of elementtree, among other things) has such a function on his website, which works with decimal, hex and named entities:
##
# Removes HTML or XML character references and entities from a text string.
@ -110,5 +124,6 @@ def unescape(text):
text = chr(name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return text # leave as is
return re.sub("&#?\w+;", fixup, text)

View File

@ -1,18 +1,29 @@
from argparse import ArgumentParser
import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args):
p = ArgumentParser("calls the createDiffHTML API function for the given padid")
p = ArgumentParser(
"calls the createDiffHTML API function for the given padid"
)
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
p.add_argument(
"--format",
default="text",
help="output format, can be: text, json; default: text",
)
p.add_argument(
"--rev", type=int, default=None, help="revision, default: latest"
)
args = p.parse_args(args)
with open(args.padinfo) as f:
@ -25,15 +36,15 @@ def main(args):
data['startRev'] = "0"
if args.rev != None:
data['rev'] = args.rev
requesturl = apiurl+'createDiffHTML?'+urlencode(data)
requesturl = apiurl + 'createDiffHTML?' + urlencode(data)
if args.showurl:
print (requesturl)
print(requesturl)
else:
try:
results = json.load(urlopen(requesturl))['data']
if args.format == "json":
print (json.dumps(results))
print(json.dumps(results))
else:
print (results['html'].encode("utf-8"))
print(results['html'].encode("utf-8"))
except HTTPError as e:
pass
pass

View File

@ -1,17 +1,24 @@
from argparse import ArgumentParser
import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args):
p = ArgumentParser("calls the getText API function for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument(
"--format",
default="text",
help="output format, can be: text, json; default: text",
)
args = p.parse_args(args)
with open(args.padinfo) as f:
@ -20,14 +27,14 @@ def main(args):
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
data = {}
data['apikey'] = info['apikey']
data['padID'] = args.padid # is utf-8 encoded
requesturl = apiurl+'deletePad?'+urlencode(data)
data['padID'] = args.padid # is utf-8 encoded
requesturl = apiurl + 'deletePad?' + urlencode(data)
if args.showurl:
print (requesturl)
print(requesturl)
else:
results = json.load(urlopen(requesturl))
if args.format == "json":
print (json.dumps(results))
print(json.dumps(results))
else:
if results['data']:
print (results['data']['text'].encode("utf-8"))
print(results['data']['text'].encode("utf-8"))

View File

@ -1,12 +1,13 @@
import json
import re
import sys
from argparse import ArgumentParser
import sys, json, re
from csv import writer
from datetime import datetime
from math import ceil, floor
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
from csv import writer
from math import ceil, floor
"""
Dumps a CSV of all pads with columns
@ -23,16 +24,27 @@ groupnamepat = re.compile(r"^g\.(\w+)\$")
out = writer(sys.stdout)
def jsonload (url):
def jsonload(url):
f = urlopen(url)
data = f.read()
f.close()
return json.loads(data)
def main (args):
def main(args):
p = ArgumentParser("outputs a CSV of information all all pads")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument(
"--zerorevs",
default=False,
action="store_true",
help="include pads with zero revisions, default: False",
)
args = p.parse_args(args)
with open(args.padinfo) as f:
@ -40,7 +52,7 @@ def main (args):
apiurl = info.get("apiurl")
data = {}
data['apikey'] = info['apikey']
requesturl = apiurl+'listAllPads?'+urlencode(data)
requesturl = apiurl + 'listAllPads?' + urlencode(data)
padids = jsonload(requesturl)['data']['padIDs']
padids.sort()
@ -49,36 +61,50 @@ def main (args):
count = 0
out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids"))
for i, padid in enumerate(padids):
p = (float(i) / numpads)
percentage = int(floor(p*100))
bars = int(ceil(p*20))
bar = ("*"*bars) + ("-"*(20-bars))
msg = "\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid)
p = float(i) / numpads
percentage = int(floor(p * 100))
bars = int(ceil(p * 20))
bar = ("*" * bars) + ("-" * (20 - bars))
msg = "\r{0} {1}/{2} {3}... ".format(bar, (i + 1), numpads, padid)
if len(msg) > maxmsglen:
maxmsglen = len(msg)
sys.stderr.write("\r{0}".format(" "*maxmsglen))
sys.stderr.write("\r{0}".format(" " * maxmsglen))
sys.stderr.write(msg.encode("utf-8"))
sys.stderr.flush()
m = groupnamepat.match(padid)
if m:
groupname = m.group(1)
padidnogroup = padid[m.end():]
padidnogroup = padid[m.end() :]
else:
groupname = ""
padidnogroup = padid
data['padID'] = padid.encode("utf-8")
revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions']
revisions = jsonload(apiurl + 'getRevisionsCount?' + urlencode(data))[
'data'
]['revisions']
if (revisions == 0) and not args.zerorevs:
continue
lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited']
lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat()
author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
lastedited_raw = jsonload(apiurl + 'getLastEdited?' + urlencode(data))[
'data'
]['lastEdited']
lastedited_iso = datetime.fromtimestamp(
int(lastedited_raw) / 1000
).isoformat()
author_ids = jsonload(apiurl + 'listAuthorsOfPad?' + urlencode(data))[
'data'
]['authorIDs']
author_ids = " ".join(author_ids).encode("utf-8")
out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids))
out.writerow(
(
padidnogroup.encode("utf-8"),
groupname.encode("utf-8"),
revisions,
lastedited_iso,
author_ids,
)
)
count += 1
print("\nWrote {0} rows...".format(count), file=sys.stderr)

View File

@ -1,18 +1,27 @@
from argparse import ArgumentParser
import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args):
p = ArgumentParser("calls the getHTML API function for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
p.add_argument(
"--format",
default="text",
help="output format, can be: text, json; default: text",
)
p.add_argument(
"--rev", type=int, default=None, help="revision, default: latest"
)
args = p.parse_args(args)
with open(args.padinfo) as f:
@ -24,12 +33,12 @@ def main(args):
data['padID'] = args.padid
if args.rev != None:
data['rev'] = args.rev
requesturl = apiurl+'getHTML?'+urlencode(data)
requesturl = apiurl + 'getHTML?' + urlencode(data)
if args.showurl:
print (requesturl)
print(requesturl)
else:
results = json.load(urlopen(requesturl))['data']
if args.format == "json":
print (json.dumps(results))
print(json.dumps(results))
else:
print (results['html'].encode("utf-8"))
print(results['html'].encode("utf-8"))

View File

@ -1,17 +1,27 @@
import json
import sys
from argparse import ArgumentParser
import json, sys
from urllib.parse import urlencode
from urllib.request import urlopen, URLError, HTTPError
from urllib.request import HTTPError, URLError, urlopen
def main(args):
p = ArgumentParser("calls the getText API function for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
p.add_argument(
"--format",
default="text",
help="output format, can be: text, json; default: text",
)
p.add_argument(
"--rev", type=int, default=None, help="revision, default: latest"
)
args = p.parse_args(args)
with open(args.padinfo) as f:
@ -20,18 +30,18 @@ def main(args):
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
data = {}
data['apikey'] = info['apikey']
data['padID'] = args.padid # is utf-8 encoded
data['padID'] = args.padid # is utf-8 encoded
if args.rev != None:
data['rev'] = args.rev
requesturl = apiurl+'getText?'+urlencode(data)
requesturl = apiurl + 'getText?' + urlencode(data)
if args.showurl:
print (requesturl)
print(requesturl)
else:
resp = urlopen(requesturl).read()
resp = resp.decode("utf-8")
results = json.loads(resp)
if args.format == "json":
print (json.dumps(results))
print(json.dumps(results))
else:
if results['data']:
sys.stdout.write(results['data']['text'])

View File

@ -1,28 +1,31 @@
#!/usr/bin/env python3
from html5lib import parse
import os, sys
import os
import sys
from argparse import ArgumentParser
from xml.etree import ElementTree as ET
from xml.etree import ElementTree as ET
from html5lib import parse
def etree_indent(elem, level=0):
i = "\n" + level*" "
i = "\n" + level * " "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
etree_indent(elem, level+1)
etree_indent(elem, level + 1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def get_link_type (url):
def get_link_type(url):
lurl = url.lower()
if lurl.endswith(".html") or lurl.endswith(".htm"):
return "text/html"
@ -37,13 +40,17 @@ def get_link_type (url):
elif lurl.endswith(".js") or lurl.endswith(".jsonp"):
return "text/javascript"
def pluralize (x):
def pluralize(x):
if type(x) == list or type(x) == tuple:
return x
else:
return (x,)
def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, indent=False):
def html5tidy(
doc, charset="utf-8", title=None, scripts=None, links=None, indent=False
):
if scripts:
script_srcs = [x.attrib.get("src") for x in doc.findall(".//script")]
for src in pluralize(scripts):
@ -56,21 +63,30 @@ def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, inden
for elt in doc.findall(".//link"):
href = elt.attrib.get("href")
if href:
existinglinks[href] = elt
existinglinks[href] = elt
for link in links:
linktype = link.get("type") or get_link_type(link["href"])
if link["href"] in existinglinks:
elt = existinglinks[link["href"]]
elt.attrib["rel"] = link["rel"]
else:
elt = ET.SubElement(doc.find(".//head"), "link", href=link["href"], rel=link["rel"])
elt = ET.SubElement(
doc.find(".//head"),
"link",
href=link["href"],
rel=link["rel"],
)
if linktype:
elt.attrib["type"] = linktype
elt.attrib["type"] = linktype
if "title" in link:
elt.attrib["title"] = link["title"]
if charset:
meta_charsets = [x.attrib.get("charset") for x in doc.findall(".//meta") if x.attrib.get("charset") != None]
meta_charsets = [
x.attrib.get("charset")
for x in doc.findall(".//meta")
if x.attrib.get("charset") != None
]
if not meta_charsets:
meta = ET.SubElement(doc.find(".//head"), "meta", charset=charset)
@ -79,33 +95,89 @@ def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, inden
if not titleelt:
titleelt = ET.SubElement(doc.find(".//head"), "title")
titleelt.text = title
if indent:
etree_indent(doc)
return doc
def main (args):
def main(args):
p = ArgumentParser("")
p.add_argument("input", nargs="?", default=None)
p.add_argument("--indent", default=False, action="store_true")
p.add_argument("--mogrify", default=False, action="store_true", help="modify file in place")
p.add_argument("--method", default="html", help="method, default: html, values: html, xml, text")
p.add_argument(
"--mogrify",
default=False,
action="store_true",
help="modify file in place",
)
p.add_argument(
"--method",
default="html",
help="method, default: html, values: html, xml, text",
)
p.add_argument("--output", default=None, help="")
p.add_argument("--title", default=None, help="ensure/add title tag in head")
p.add_argument("--charset", default="utf-8", help="ensure/add meta tag with charset")
p.add_argument("--script", action="append", default=[], help="ensure/add script tag")
p.add_argument(
"--charset", default="utf-8", help="ensure/add meta tag with charset"
)
p.add_argument(
"--script", action="append", default=[], help="ensure/add script tag"
)
# <link>s, see https://www.w3.org/TR/html5/links.html#links
p.add_argument("--stylesheet", action="append", default=[], help="ensure/add style link")
p.add_argument("--alternate", action="append", default=[], nargs="+", help="ensure/add alternate links (optionally followed by a title and type)")
p.add_argument("--next", action="append", default=[], nargs="+", help="ensure/add alternate link")
p.add_argument("--prev", action="append", default=[], nargs="+", help="ensure/add alternate link")
p.add_argument("--search", action="append", default=[], nargs="+", help="ensure/add search link")
p.add_argument("--rss", action="append", default=[], nargs="+", help="ensure/add alternate link of type application/rss+xml")
p.add_argument("--atom", action="append", default=[], nargs="+", help="ensure/add alternate link of type application/atom+xml")
p.add_argument(
"--stylesheet",
action="append",
default=[],
help="ensure/add style link",
)
p.add_argument(
"--alternate",
action="append",
default=[],
nargs="+",
help="ensure/add alternate links (optionally followed by a title and type)",
)
p.add_argument(
"--next",
action="append",
default=[],
nargs="+",
help="ensure/add alternate link",
)
p.add_argument(
"--prev",
action="append",
default=[],
nargs="+",
help="ensure/add alternate link",
)
p.add_argument(
"--search",
action="append",
default=[],
nargs="+",
help="ensure/add search link",
)
p.add_argument(
"--rss",
action="append",
default=[],
nargs="+",
help="ensure/add alternate link of type application/rss+xml",
)
p.add_argument(
"--atom",
action="append",
default=[],
nargs="+",
help="ensure/add alternate link of type application/atom+xml",
)
args = p.parse_args(args)
links = []
def add_links (links, items, rel, _type=None):
def add_links(links, items, rel, _type=None):
for href in items:
d = {}
d["rel"] = rel
@ -128,6 +200,7 @@ def main (args):
d["href"] = href
links.append(d)
for rel in ("stylesheet", "alternate", "next", "prev", "search"):
add_links(links, getattr(args, rel), rel)
for item in args.rss:
@ -144,27 +217,33 @@ def main (args):
doc = parse(fin, treebuilder="etree", namespaceHTMLElements=False)
if fin != sys.stdin:
fin.close()
html5tidy(doc, scripts=args.script, links=links, title=args.title, indent=args.indent)
html5tidy(
doc,
scripts=args.script,
links=links,
title=args.title,
indent=args.indent,
)
# OUTPUT
tmppath = None
if args.output:
fout = open(args.output, "w")
elif args.mogrify:
tmppath = args.input+".tmp"
tmppath = args.input + ".tmp"
fout = open(tmppath, "w")
else:
fout = sys.stdout
print (ET.tostring(doc, method=args.method, encoding="unicode"), file=fout)
print(ET.tostring(doc, method=args.method, encoding="unicode"), file=fout)
if fout != sys.stdout:
fout.close()
if tmppath:
os.rename(args.input, args.input+"~")
os.rename(args.input, args.input + "~")
os.rename(tmppath, args.input)
if __name__ == "__main__":
if __name__ == "__main__":
main(sys.argv)

View File

@ -1,16 +1,19 @@
import json
import os
import re
import sys
import time
from argparse import ArgumentParser
import sys, json, re, os, time
from datetime import datetime
import dateutil.parser
from urllib.parse import urlparse, urlunparse, urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
from jinja2 import FileSystemLoader, Environment
from etherpump.commands.common import *
from time import sleep
from urllib.parse import quote, urlencode, urlparse, urlunparse
from urllib.request import HTTPError, URLError, urlopen
from jinja2 import Environment, FileSystemLoader
import dateutil.parser
from etherpump.commands.common import *
"""
index:
@ -20,7 +23,8 @@ index:
"""
def group (items, key=lambda x: x):
def group(items, key=lambda x: x):
""" returns a list of lists, of items grouped by a key function """
ret = []
keys = {}
@ -34,10 +38,12 @@ def group (items, key=lambda x: x):
ret.append(keys[k])
return ret
# def base (x):
# return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
def splitextlong (x):
def splitextlong(x):
""" split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """
m = re.search(r"^(.*?)(\..*)$", x)
if m:
@ -45,20 +51,24 @@ def splitextlong (x):
else:
return x, ''
def base (x):
def base(x):
return splitextlong(x)[0]
def excerpt (t, chars=25):
def excerpt(t, chars=25):
if len(t) > chars:
t = t[:chars] + "..."
return t
def absurl (url, base=None):
def absurl(url, base=None):
if not url.startswith("http"):
return base + url
return url
def url_base (url):
def url_base(url):
(scheme, netloc, path, params, query, fragment) = urlparse(url)
path, _ = os.path.split(path.lstrip("/"))
ret = urlunparse((scheme, netloc, path, None, None, None))
@ -66,45 +76,131 @@ def url_base (url):
ret += "/"
return ret
def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'):
def datetimeformat(t, format='%Y-%m-%d %H:%M:%S'):
if type(t) == str:
dt = dateutil.parser.parse(t)
return dt.strftime(format)
else:
return time.strftime(format, time.localtime(t))
def main (args):
def main(args):
p = ArgumentParser("Convert dumped files to a document via a template.")
p.add_argument("input", nargs="+", help="Files to list (.meta.json files)")
p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in")
p.add_argument("--template", default="index.html", help="template name, built-ins include index.html, rss.xml; default: index.html")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: ./.etherdump/settings.json")
p.add_argument(
"--templatepath",
default=None,
help="path to find templates, default: built-in",
)
p.add_argument(
"--template",
default="index.html",
help="template name, built-ins include index.html, rss.xml; default: index.html",
)
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: ./.etherdump/settings.json",
)
# p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
p.add_argument("--order", default="padid", help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid")
p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)")
p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
p.add_argument(
"--order",
default="padid",
help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid",
)
p.add_argument(
"--reverse",
default=False,
action="store_true",
help="reverse order, default: False (reverse chrono)",
)
p.add_argument(
"--limit",
type=int,
default=0,
help="limit to number of items, default: 0 (no limit)",
)
p.add_argument(
"--skip",
default=None,
type=int,
help="skip this many items, default: None",
)
p.add_argument("--content", default=False, action="store_true", help="rss: include (full) content tag, default: False")
p.add_argument("--link", default="diffhtml,html,text", help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text")
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl")
p.add_argument(
"--content",
default=False,
action="store_true",
help="rss: include (full) content tag, default: False",
)
p.add_argument(
"--link",
default="diffhtml,html,text",
help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text",
)
p.add_argument(
"--linkbase",
default=None,
help="base url to use for links, default: try to use the feedurl",
)
p.add_argument("--output", default=None, help="output, default: stdout")
p.add_argument("--files", default=False, action="store_true", help="include files (experimental)")
p.add_argument(
"--files",
default=False,
action="store_true",
help="include files (experimental)",
)
pg = p.add_argument_group('template variables')
pg.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml")
pg.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url")
pg.add_argument("--title", default="etherpump", help="title for document or rss feed channel title, default: etherdump")
pg.add_argument("--description", default="", help="rss: channel description, default: empty")
pg.add_argument("--language", default="en-US", help="rss: feed language, default: en-US")
pg.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily")
pg.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1")
pg.add_argument("--generator", default="https://gitlab.com/activearchives/etherpump", help="generator, default: https://gitlab.com/activearchives/etherdump")
pg.add_argument("--timestamp", default=None, help="timestamp, default: now (e.g. 2015-12-01 12:30:00)")
pg.add_argument(
"--feedurl",
default="feed.xml",
help="rss: to use as feeds own (self) link, default: feed.xml",
)
pg.add_argument(
"--siteurl",
default=None,
help="rss: to use as channel's site link, default: the etherpad url",
)
pg.add_argument(
"--title",
default="etherpump",
help="title for document or rss feed channel title, default: etherdump",
)
pg.add_argument(
"--description",
default="",
help="rss: channel description, default: empty",
)
pg.add_argument(
"--language", default="en-US", help="rss: feed language, default: en-US"
)
pg.add_argument(
"--updatePeriod",
default="daily",
help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily",
)
pg.add_argument(
"--updateFrequency",
default=1,
type=int,
help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1",
)
pg.add_argument(
"--generator",
default="https://gitlab.com/activearchives/etherpump",
help="generator, default: https://gitlab.com/activearchives/etherdump",
)
pg.add_argument(
"--timestamp",
default=None,
help="timestamp, default: now (e.g. 2015-12-01 12:30:00)",
)
pg.add_argument("--next", default=None, help="next link, default: None)")
pg.add_argument("--prev", default=None, help="prev link, default: None")
@ -129,17 +225,12 @@ def main (args):
# Use "base" to strip (longest) extensions
# inputs = group(inputs, base)
def wrappath (p):
def wrappath(p):
path = "./{0}".format(p)
ext = os.path.splitext(p)[1][1:]
return {
"url": path,
"path": path,
"code": 200,
"type": ext
}
return {"url": path, "path": path, "code": 200, "type": ext}
def metaforpaths (paths):
def metaforpaths(paths):
ret = {}
pid = base(paths[0])
ret['pad'] = ret['padid'] = pid
@ -149,7 +240,9 @@ def main (args):
mtime = os.stat(p).st_mtime
if lastedited == None or mtime > lastedited:
lastedited = mtime
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S")
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime(
"%Y-%m-%dT%H:%M:%S"
)
ret["lastedited_raw"] = mtime
return ret
@ -169,7 +262,7 @@ def main (args):
# else:
# return metaforpaths(paths)
def fixdates (padmeta):
def fixdates(padmeta):
d = dateutil.parser.parse(padmeta["lastedited_iso"])
padmeta["lastedited"] = d
padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000")
@ -180,17 +273,21 @@ def main (args):
pads = list(map(fixdates, pads))
args.pads = list(pads)
def could_have_base (x, y):
return x == y or (x.startswith(y) and x[len(y):].startswith("."))
def could_have_base(x, y):
return x == y or (x.startswith(y) and x[len(y) :].startswith("."))
def get_best_pad (x):
def get_best_pad(x):
for pb in padbases:
p = pads_by_base[pb]
if could_have_base(x, pb):
return p
def has_version (padinfo, path):
return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path]
def has_version(padinfo, path):
return [
x
for x in padinfo['versions']
if 'path' in x and x['path'] == "./" + path
]
if args.files:
inputs = args.input
@ -208,25 +305,33 @@ def main (args):
# print ("PADBASES", file=sys.stderr)
# for pb in padbases:
# print (" ", pb, file=sys.stderr)
print ("pairing input files with pads", file=sys.stderr)
print("pairing input files with pads", file=sys.stderr)
for x in inputs:
# pair input with a pad if possible
xbasename = os.path.basename(x)
p = get_best_pad(xbasename)
if p:
if not has_version(p, x):
print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr)
print(
"Grouping file {0} with pad {1}".format(x, p['padid']),
file=sys.stderr,
)
p['versions'].append(wrappath(x))
else:
print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr)
print(
"Skipping existing version {0} ({1})...".format(
x, p['padid']
),
file=sys.stderr,
)
removelist.append(x)
# Removed Matches files
for x in removelist:
inputs.remove(x)
print ("Remaining files:", file=sys.stderr)
print("Remaining files:", file=sys.stderr)
for x in inputs:
print (x, file=sys.stderr)
print (file=sys.stderr)
print(x, file=sys.stderr)
print(file=sys.stderr)
# Add "fake" pads for remaining files
for x in inputs:
args.pads.append(metaforpaths([x]))
@ -242,7 +347,9 @@ def main (args):
# order items & apply limit
if args.order == "lastedited":
args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse)
args.pads.sort(
key=lambda x: x.get("lastedited_iso"), reverse=args.reverse
)
elif args.order == "pad":
args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse)
elif args.order == "padid":
@ -250,12 +357,14 @@ def main (args):
elif args.order == "revisions":
args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse)
elif args.order == "authors":
args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse)
args.pads.sort(
key=lambda x: len(x.get("authors")), reverse=args.reverse
)
else:
raise Exception("That ordering is not implemented!")
if args.limit:
args.pads = args.pads[:args.limit]
args.pads = args.pads[: args.limit]
# add versions_by_type, add in full text
# add link (based on args.link)
@ -272,7 +381,7 @@ def main (args):
if "text" in versions_by_type:
try:
with open (versions_by_type["text"]["path"]) as f:
with open(versions_by_type["text"]["path"]) as f:
p["text"] = f.read()
except FileNotFoundError:
p['text'] = ''
@ -289,6 +398,6 @@ def main (args):
if args.output:
with open(args.output, "w") as f:
print (template.render(vars(args)), file=f)
print(template.render(vars(args)), file=f)
else:
print (template.render(vars(args)))
print(template.render(vars(args)))

View File

@ -1,19 +1,19 @@
import json
import os
import sys
from argparse import ArgumentParser
from urllib.parse import urlencode, urlparse, urlunparse
from urllib.request import HTTPError, URLError, urlopen
from urllib.parse import urlparse, urlunparse, urlencode
from urllib.request import urlopen, URLError, HTTPError
import json, os, sys
def get_api(url, cmd=None, data=None, verbose=False):
try:
useurl = url+cmd
useurl = url + cmd
if data:
useurl += "?"+urlencode(data)
useurl += "?" + urlencode(data)
# data['apikey'] = "7c8faa070c97f83d8f705c935a32d5141f89cbaa2158042fa92e8ddad5dbc5e1"
if verbose:
print ("trying", useurl, file=sys.stderr)
print("trying", useurl, file=sys.stderr)
resp = urlopen(useurl).read()
resp = resp.decode("utf-8")
resp = json.loads(resp)
@ -21,11 +21,11 @@ def get_api(url, cmd=None, data=None, verbose=False):
return resp
except ValueError as e:
if verbose:
print (" ValueError", e, file=sys.stderr)
print(" ValueError", e, file=sys.stderr)
return
except HTTPError as e:
if verbose:
print (" HTTPError", e, file=sys.stderr)
print(" HTTPError", e, file=sys.stderr)
if e.code == 401:
# Unauthorized is how the API responds to an incorrect API key
return {"code": 401, "message": e}
@ -34,7 +34,8 @@ def get_api(url, cmd=None, data=None, verbose=False):
# # print ("returning", resp, file=sys.stderr)
# return resp
def tryapiurl (url, verbose=False):
def tryapiurl(url, verbose=False):
"""
Try to use url as api, correcting if possible.
Returns corrected / normalized URL, or None if not possible
@ -47,22 +48,30 @@ def tryapiurl (url, verbose=False):
params, query, fragment = ("", "", "")
path = path.strip("/")
# 1. try directly...
apiurl = urlunparse((scheme, netloc, path, params, query, fragment))+"/"
apiurl = (
urlunparse((scheme, netloc, path, params, query, fragment)) + "/"
)
if get_api(apiurl, "listAllPads", verbose=verbose):
return apiurl
# 2. try with += api/1.2.9
path = os.path.join(path, "api", "1.2.9")+"/"
path = os.path.join(path, "api", "1.2.9") + "/"
apiurl = urlunparse((scheme, netloc, path, params, query, fragment))
if get_api(apiurl, "listAllPads", verbose=verbose):
return apiurl
# except ValueError as e:
# print ("ValueError", e, file=sys.stderr)
except URLError as e:
print ("URLError", e, file=sys.stderr)
print("URLError", e, file=sys.stderr)
def main(args):
p = ArgumentParser("initialize an etherpump folder")
p.add_argument("arg", nargs="*", default=[], help="optional positional args: path etherpadurl")
p.add_argument(
"arg",
nargs="*",
default=[],
help="optional positional args: path etherpadurl",
)
p.add_argument("--path", default=None, help="path to initialize")
p.add_argument("--padurl", default=None, help="")
p.add_argument("--apikey", default=None, help="")
@ -70,7 +79,6 @@ def main(args):
p.add_argument("--reinit", default=False, action="store_true", help="")
args = p.parse_args(args)
path = args.path
if path == None and len(args.arg):
path = args.arg[0]
@ -89,7 +97,9 @@ def main(args):
with open(padinfopath) as f:
padinfo = json.load(f)
if not args.reinit:
print ("Folder is already initialized. Use --reinit to reset settings.")
print(
"Folder is already initialized. Use --reinit to reset settings."
)
sys.exit(0)
except IOError:
pass
@ -100,7 +110,7 @@ def main(args):
apiurl = args.padurl
while True:
if apiurl:
apiurl = tryapiurl(apiurl,verbose=args.verbose)
apiurl = tryapiurl(apiurl, verbose=args.verbose)
if apiurl:
# print ("Got APIURL: {0}".format(apiurl))
break
@ -109,13 +119,18 @@ def main(args):
apikey = args.apikey
while True:
if apikey:
resp = get_api(apiurl, "listAllPads", {"apikey": apikey}, verbose=args.verbose)
resp = get_api(
apiurl, "listAllPads", {"apikey": apikey}, verbose=args.verbose
)
if resp and resp["code"] == 0:
# print ("GOOD")
break
else:
print ("bad")
print ("The APIKEY is the contents of the file APIKEY.txt in the etherpad folder", file=sys.stderr)
print("bad")
print(
"The APIKEY is the contents of the file APIKEY.txt in the etherpad folder",
file=sys.stderr,
)
apikey = input("Please paste the APIKEY: ").strip()
padinfo["apikey"] = apikey

View File

@ -1,11 +1,13 @@
import json
import os
import re
from argparse import ArgumentParser
import json, os, re
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def group (items, key=lambda x: x):
def group(items, key=lambda x: x):
ret = []
keys = {}
for item in items:
@ -18,6 +20,7 @@ def group (items, key=lambda x: x):
ret.append(keys[k])
return ret
def main(args):
p = ArgumentParser("")
p.add_argument("input", nargs="+", help="filenames")
@ -28,10 +31,11 @@ def main(args):
inputs = [x for x in inputs if not os.path.isdir(x)]
def base (x):
def base(x):
return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x)
#from pprint import pprint
#pprint()
# from pprint import pprint
# pprint()
gg = group(inputs, base)
for items in gg:
itembase = base(items[0])
@ -41,5 +45,5 @@ def main(args):
pass
for i in items:
newloc = os.path.join(itembase, i)
print ("'{0}' => '{1}'".format(i, newloc))
print("'{0}' => '{1}'".format(i, newloc))
os.rename(i, newloc)

View File

@ -1,31 +1,40 @@
from argparse import ArgumentParser
import json
import sys
from etherpump.commands.common import getjson
from urllib.parse import urlparse, urlunparse, urlencode
from urllib.request import urlopen, URLError, HTTPError
from argparse import ArgumentParser
from urllib.parse import urlencode, urlparse, urlunparse
from urllib.request import HTTPError, URLError, urlopen
def main (args):
from etherpump.commands.common import getjson
def main(args):
p = ArgumentParser("call listAllPads and print the results")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="lines", help="output format: lines, json; default lines")
p.add_argument(
"--format",
default="lines",
help="output format: lines, json; default lines",
)
args = p.parse_args(args)
with open(args.padinfo) as f:
info = json.load(f)
apiurl = info.get("apiurl")
apiurl = info.get("apiurl")
# apiurl = {0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
data = {}
data['apikey'] = info['apikey']
requesturl = apiurl+'listAllPads?'+urlencode(data)
requesturl = apiurl + 'listAllPads?' + urlencode(data)
if args.showurl:
print (requesturl)
print(requesturl)
else:
results = getjson(requesturl)['data']['padIDs']
if args.format == "json":
print (json.dumps(results))
print(json.dumps(results))
else:
for r in results:
print (r)
print(r)

View File

@ -1,17 +1,24 @@
from argparse import ArgumentParser
import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args):
p = ArgumentParser("call listAuthorsOfPad for the padid")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="lines", help="output format, can be: lines, json; default: lines")
p.add_argument(
"--format",
default="lines",
help="output format, can be: lines, json; default: lines",
)
args = p.parse_args(args)
with open(args.padinfo) as f:
@ -20,13 +27,13 @@ def main(args):
data = {}
data['apikey'] = info['apikey']
data['padID'] = args.padid.encode("utf-8")
requesturl = apiurl+'listAuthorsOfPad?'+urlencode(data)
requesturl = apiurl + 'listAuthorsOfPad?' + urlencode(data)
if args.showurl:
print (requesturl)
print(requesturl)
else:
results = json.load(urlopen(requesturl))['data']['authorIDs']
if args.format == "json":
print (json.dumps(results))
print(json.dumps(results))
else:
for r in results:
print (r.encode("utf-8"))
print(r.encode("utf-8"))

View File

@ -1,17 +1,20 @@
import json
import os
import re
import sys
import time
from argparse import ArgumentParser
import sys, json, re, os, time
from datetime import datetime
from time import sleep
from urllib.parse import quote, urlencode, urlparse, urlunparse
from urllib.request import HTTPError, URLError, urlopen
from jinja2 import Environment, FileSystemLoader
import dateutil.parser
import pypandoc
from urllib.parse import urlparse, urlunparse, urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
from jinja2 import FileSystemLoader, Environment
from etherpump.commands.common import *
from time import sleep
import dateutil.parser
"""
publication:
@ -21,7 +24,8 @@ publication:
"""
def group (items, key=lambda x: x):
def group(items, key=lambda x: x):
""" returns a list of lists, of items grouped by a key function """
ret = []
keys = {}
@ -35,10 +39,12 @@ def group (items, key=lambda x: x):
ret.append(keys[k])
return ret
# def base (x):
# return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
def splitextlong (x):
def splitextlong(x):
""" split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """
m = re.search(r"^(.*?)(\..*)$", x)
if m:
@ -46,20 +52,24 @@ def splitextlong (x):
else:
return x, ''
def base (x):
def base(x):
return splitextlong(x)[0]
def excerpt (t, chars=25):
def excerpt(t, chars=25):
if len(t) > chars:
t = t[:chars] + "..."
return t
def absurl (url, base=None):
def absurl(url, base=None):
if not url.startswith("http"):
return base + url
return url
def url_base (url):
def url_base(url):
(scheme, netloc, path, params, query, fragment) = urlparse(url)
path, _ = os.path.split(path.lstrip("/"))
ret = urlunparse((scheme, netloc, path, None, None, None))
@ -67,45 +77,131 @@ def url_base (url):
ret += "/"
return ret
def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'):
def datetimeformat(t, format='%Y-%m-%d %H:%M:%S'):
if type(t) == str:
dt = dateutil.parser.parse(t)
return dt.strftime(format)
else:
return time.strftime(format, time.localtime(t))
def main (args):
def main(args):
p = ArgumentParser("Convert dumped files to a document via a template.")
p.add_argument("input", nargs="+", help="Files to list (.meta.json files)")
p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in")
p.add_argument("--template", default="publication.html", help="template name, built-ins include publication.html; default: publication.html")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: ./.etherdump/settings.json")
p.add_argument(
"--templatepath",
default=None,
help="path to find templates, default: built-in",
)
p.add_argument(
"--template",
default="publication.html",
help="template name, built-ins include publication.html; default: publication.html",
)
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: ./.etherdump/settings.json",
)
# p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
p.add_argument("--order", default="padid", help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid")
p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)")
p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
p.add_argument(
"--order",
default="padid",
help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid",
)
p.add_argument(
"--reverse",
default=False,
action="store_true",
help="reverse order, default: False (reverse chrono)",
)
p.add_argument(
"--limit",
type=int,
default=0,
help="limit to number of items, default: 0 (no limit)",
)
p.add_argument(
"--skip",
default=None,
type=int,
help="skip this many items, default: None",
)
p.add_argument("--content", default=False, action="store_true", help="rss: include (full) content tag, default: False")
p.add_argument("--link", default="diffhtml,html,text", help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text")
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl")
p.add_argument(
"--content",
default=False,
action="store_true",
help="rss: include (full) content tag, default: False",
)
p.add_argument(
"--link",
default="diffhtml,html,text",
help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text",
)
p.add_argument(
"--linkbase",
default=None,
help="base url to use for links, default: try to use the feedurl",
)
p.add_argument("--output", default=None, help="output, default: stdout")
p.add_argument("--files", default=False, action="store_true", help="include files (experimental)")
p.add_argument(
"--files",
default=False,
action="store_true",
help="include files (experimental)",
)
pg = p.add_argument_group('template variables')
pg.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml")
pg.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url")
pg.add_argument("--title", default="etherpump", help="title for document or rss feed channel title, default: etherdump")
pg.add_argument("--description", default="", help="rss: channel description, default: empty")
pg.add_argument("--language", default="en-US", help="rss: feed language, default: en-US")
pg.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily")
pg.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1")
pg.add_argument("--generator", default="https://gitlab.com/activearchives/etherpump", help="generator, default: https://gitlab.com/activearchives/etherdump")
pg.add_argument("--timestamp", default=None, help="timestamp, default: now (e.g. 2015-12-01 12:30:00)")
pg.add_argument(
"--feedurl",
default="feed.xml",
help="rss: to use as feeds own (self) link, default: feed.xml",
)
pg.add_argument(
"--siteurl",
default=None,
help="rss: to use as channel's site link, default: the etherpad url",
)
pg.add_argument(
"--title",
default="etherpump",
help="title for document or rss feed channel title, default: etherdump",
)
pg.add_argument(
"--description",
default="",
help="rss: channel description, default: empty",
)
pg.add_argument(
"--language", default="en-US", help="rss: feed language, default: en-US"
)
pg.add_argument(
"--updatePeriod",
default="daily",
help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily",
)
pg.add_argument(
"--updateFrequency",
default=1,
type=int,
help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1",
)
pg.add_argument(
"--generator",
default="https://gitlab.com/activearchives/etherpump",
help="generator, default: https://gitlab.com/activearchives/etherdump",
)
pg.add_argument(
"--timestamp",
default=None,
help="timestamp, default: now (e.g. 2015-12-01 12:30:00)",
)
pg.add_argument("--next", default=None, help="next link, default: None)")
pg.add_argument("--prev", default=None, help="prev link, default: None")
@ -130,17 +226,12 @@ def main (args):
# Use "base" to strip (longest) extensions
# inputs = group(inputs, base)
def wrappath (p):
def wrappath(p):
path = "./{0}".format(p)
ext = os.path.splitext(p)[1][1:]
return {
"url": path,
"path": path,
"code": 200,
"type": ext
}
return {"url": path, "path": path, "code": 200, "type": ext}
def metaforpaths (paths):
def metaforpaths(paths):
ret = {}
pid = base(paths[0])
ret['pad'] = ret['padid'] = pid
@ -150,7 +241,9 @@ def main (args):
mtime = os.stat(p).st_mtime
if lastedited == None or mtime > lastedited:
lastedited = mtime
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S")
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime(
"%Y-%m-%dT%H:%M:%S"
)
ret["lastedited_raw"] = mtime
return ret
@ -170,7 +263,7 @@ def main (args):
# else:
# return metaforpaths(paths)
def fixdates (padmeta):
def fixdates(padmeta):
d = dateutil.parser.parse(padmeta["lastedited_iso"])
padmeta["lastedited"] = d
padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000")
@ -181,17 +274,21 @@ def main (args):
pads = list(map(fixdates, pads))
args.pads = list(pads)
def could_have_base (x, y):
return x == y or (x.startswith(y) and x[len(y):].startswith("."))
def could_have_base(x, y):
return x == y or (x.startswith(y) and x[len(y) :].startswith("."))
def get_best_pad (x):
def get_best_pad(x):
for pb in padbases:
p = pads_by_base[pb]
if could_have_base(x, pb):
return p
def has_version (padinfo, path):
return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path]
def has_version(padinfo, path):
return [
x
for x in padinfo['versions']
if 'path' in x and x['path'] == "./" + path
]
if args.files:
inputs = args.input
@ -209,25 +306,33 @@ def main (args):
# print ("PADBASES", file=sys.stderr)
# for pb in padbases:
# print (" ", pb, file=sys.stderr)
print ("pairing input files with pads", file=sys.stderr)
print("pairing input files with pads", file=sys.stderr)
for x in inputs:
# pair input with a pad if possible
xbasename = os.path.basename(x)
p = get_best_pad(xbasename)
if p:
if not has_version(p, x):
print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr)
print(
"Grouping file {0} with pad {1}".format(x, p['padid']),
file=sys.stderr,
)
p['versions'].append(wrappath(x))
else:
print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr)
print(
"Skipping existing version {0} ({1})...".format(
x, p['padid']
),
file=sys.stderr,
)
removelist.append(x)
# Removed Matches files
for x in removelist:
inputs.remove(x)
print ("Remaining files:", file=sys.stderr)
print("Remaining files:", file=sys.stderr)
for x in inputs:
print (x, file=sys.stderr)
print (file=sys.stderr)
print(x, file=sys.stderr)
print(file=sys.stderr)
# Add "fake" pads for remaining files
for x in inputs:
args.pads.append(metaforpaths([x]))
@ -243,7 +348,9 @@ def main (args):
# order items & apply limit
if args.order == "lastedited":
args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse)
args.pads.sort(
key=lambda x: x.get("lastedited_iso"), reverse=args.reverse
)
elif args.order == "pad":
args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse)
elif args.order == "padid":
@ -251,17 +358,20 @@ def main (args):
elif args.order == "revisions":
args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse)
elif args.order == "authors":
args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse)
args.pads.sort(
key=lambda x: len(x.get("authors")), reverse=args.reverse
)
elif args.order == "custom":
# TODO: make this list non-static, but a variable that can be given from the CLI
# TODO: make this list non-static, but a variable that can be given from the CLI
customorder = [
'nooo.relearn.preamble',
'nooo.relearn.activating.the.archive',
'nooo.relearn.call.for.proposals',
'nooo.relearn.call.for.proposals-proposal-footnote',
'nooo.relearn.colophon']
'nooo.relearn.colophon',
]
order = []
for x in customorder:
for pad in args.pads:
@ -272,7 +382,7 @@ def main (args):
raise Exception("That ordering is not implemented!")
if args.limit:
args.pads = args.pads[:args.limit]
args.pads = args.pads[: args.limit]
# add versions_by_type, add in full text
# add link (based on args.link)
@ -289,7 +399,7 @@ def main (args):
if "text" in versions_by_type:
# try:
with open (versions_by_type["text"]["path"]) as f:
with open(versions_by_type["text"]["path"]) as f:
content = f.read()
# print('content:', content)
# [Relearn] Add pandoc command here?
@ -297,7 +407,7 @@ def main (args):
# print('html:', html)
p["text"] = html
# except FileNotFoundError:
# p['text'] = 'ERROR'
# p['text'] = 'ERROR'
# ADD IN LINK TO PAD AS "link"
for v in linkversions:
@ -312,6 +422,6 @@ def main (args):
if args.output:
with open(args.output, "w") as f:
print (template.render(vars(args)), file=f)
print(template.render(vars(args)), file=f)
else:
print (template.render(vars(args)))
print(template.render(vars(args)))

View File

@ -1,17 +1,19 @@
import json
import os
import re
import sys
from argparse import ArgumentParser
import sys, json, re, os
from datetime import datetime
from fnmatch import fnmatch
from time import sleep
from urllib.parse import quote, urlencode
from urllib.request import HTTPError, URLError, urlopen
from xml.etree import ElementTree as ET
from urllib.parse import urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
import html5lib
from etherpump.commands.common import *
from time import sleep
from etherpump.commands.html5tidy import html5tidy
import html5lib
from xml.etree import ElementTree as ET
from fnmatch import fnmatch
# debugging
# import ElementTree as ET
@ -28,43 +30,144 @@ use/prefer public interfaces ? (export functions)
"""
def try_deleting (files):
def try_deleting(files):
for f in files:
try:
os.remove(f)
except OSError as e:
pass
def main (args):
p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)")
def main(args):
p = ArgumentParser(
"Check for pads that have changed since last sync (according to .meta.json)"
)
p.add_argument("padid", nargs="*", default=[])
p.add_argument("--glob", default=False, help="download pads matching a glob pattern")
p.add_argument(
"--glob", default=False, help="download pads matching a glob pattern"
)
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherpump/settings.json")
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
p.add_argument("--pub", default="p", help="folder to store files for public pads, default: p")
p.add_argument("--group", default="g", help="folder to store files for group pads, default: g")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False")
p.add_argument("--text", default=False, action="store_true", help="download text to PADID.txt, default: False")
p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False")
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.diff.html, default: False")
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")
p.add_argument("--folder", default=False, action="store_true", help="dump files in a folder named PADID (meta, text, html, dhtml), default: False")
p.add_argument("--output", default=False, action="store_true", help="output changed padids on stdout")
p.add_argument("--force", default=False, action="store_true", help="reload, even if revisions count matches previous")
p.add_argument("--no-raw-ext", default=False, action="store_true", help="save plain text as padname with no (additional) extension")
p.add_argument("--fix-names", default=False, action="store_true", help="normalize padid's (no spaces, special control chars) for use in file names")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherpump/settings.json",
)
p.add_argument(
"--zerorevs",
default=False,
action="store_true",
help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)",
)
p.add_argument(
"--pub",
default="p",
help="folder to store files for public pads, default: p",
)
p.add_argument(
"--group",
default="g",
help="folder to store files for group pads, default: g",
)
p.add_argument(
"--skip",
default=None,
type=int,
help="skip this many items, default: None",
)
p.add_argument(
"--meta",
default=False,
action="store_true",
help="download meta to PADID.meta.json, default: False",
)
p.add_argument(
"--text",
default=False,
action="store_true",
help="download text to PADID.txt, default: False",
)
p.add_argument(
"--html",
default=False,
action="store_true",
help="download html to PADID.html, default: False",
)
p.add_argument(
"--dhtml",
default=False,
action="store_true",
help="download dhtml to PADID.diff.html, default: False",
)
p.add_argument(
"--all",
default=False,
action="store_true",
help="download all files (meta, text, html, dhtml), default: False",
)
p.add_argument(
"--folder",
default=False,
action="store_true",
help="dump files in a folder named PADID (meta, text, html, dhtml), default: False",
)
p.add_argument(
"--output",
default=False,
action="store_true",
help="output changed padids on stdout",
)
p.add_argument(
"--force",
default=False,
action="store_true",
help="reload, even if revisions count matches previous",
)
p.add_argument(
"--no-raw-ext",
default=False,
action="store_true",
help="save plain text as padname with no (additional) extension",
)
p.add_argument(
"--fix-names",
default=False,
action="store_true",
help="normalize padid's (no spaces, special control chars) for use in file names",
)
p.add_argument("--filter-ext", default=None, help="filter pads by extension")
p.add_argument(
"--filter-ext", default=None, help="filter pads by extension"
)
p.add_argument("--css", default="/styles.css", help="add css url to output pages, default: /styles.css")
p.add_argument("--script", default="/versions.js", help="add script url to output pages, default: /versions.js")
p.add_argument(
"--css",
default="/styles.css",
help="add css url to output pages, default: /styles.css",
)
p.add_argument(
"--script",
default="/versions.js",
help="add script url to output pages, default: /versions.js",
)
p.add_argument("--nopublish", default="__NOPUBLISH__", help="no publish magic word, default: __NOPUBLISH__")
p.add_argument("--publish", default="__PUBLISH__", help="the publish magic word, default: __PUBLISH__")
p.add_argument("--publish-opt-in", default=False, action="store_true", help="ensure `--publish` is honoured instead of `--nopublish`")
p.add_argument(
"--nopublish",
default="__NOPUBLISH__",
help="no publish magic word, default: __NOPUBLISH__",
)
p.add_argument(
"--publish",
default="__PUBLISH__",
help="the publish magic word, default: __PUBLISH__",
)
p.add_argument(
"--publish-opt-in",
default=False,
action="store_true",
help="ensure `--publish` is honoured instead of `--nopublish`",
)
args = p.parse_args(args)
@ -79,16 +182,20 @@ def main (args):
if args.padid:
padids = args.padid
elif args.glob:
padids = getjson(info['localapiurl']+'listAllPads?'+urlencode(data))['data']['padIDs']
padids = getjson(
info['localapiurl'] + 'listAllPads?' + urlencode(data)
)['data']['padIDs']
padids = [x for x in padids if fnmatch(x, args.glob)]
else:
padids = getjson(info['localapiurl']+'listAllPads?'+urlencode(data))['data']['padIDs']
padids = getjson(
info['localapiurl'] + 'listAllPads?' + urlencode(data)
)['data']['padIDs']
padids.sort()
numpads = len(padids)
# maxmsglen = 0
count = 0
for i, padid in enumerate(padids):
if args.skip != None and i<args.skip:
if args.skip != None and i < args.skip:
continue
progressbar(i, numpads, padid)
@ -110,47 +217,73 @@ def main (args):
if os.path.exists(metapath):
with open(metapath) as f:
meta.update(json.load(f))
revisions = getjson(info['localapiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
revisions = getjson(
info['localapiurl']
+ 'getRevisionsCount?'
+ urlencode(data)
)['data']['revisions']
if meta['revisions'] == revisions and not args.force:
skip=True
skip = True
break
meta['padid'] = padid # .encode("utf-8")
meta['padid'] = padid # .encode("utf-8")
versions = meta["versions"] = []
versions.append({
"url": padurlbase + quote(padid),
"type": "pad",
"code": 200
})
versions.append(
{
"url": padurlbase + quote(padid),
"type": "pad",
"code": 200,
}
)
if revisions == None:
meta['revisions'] = getjson(info['localapiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
meta['revisions'] = getjson(
info['localapiurl']
+ 'getRevisionsCount?'
+ urlencode(data)
)['data']['revisions']
else:
meta['revisions' ] = revisions
meta['revisions'] = revisions
if (meta['revisions'] == 0) and (not args.zerorevs):
# print("Skipping zero revs", file=sys.stderr)
skip=True
skip = True
break
# todo: load more metadata!
meta['group'], meta['pad'] = splitpadname(padid)
meta['pathbase'] = p
meta['lastedited_raw'] = int(getjson(info['localapiurl']+'getLastEdited?'+urlencode(data))['data']['lastEdited'])
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat()
meta['author_ids'] = getjson(info['localapiurl']+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
meta['lastedited_raw'] = int(
getjson(
info['localapiurl'] + 'getLastEdited?' + urlencode(data)
)['data']['lastEdited']
)
meta['lastedited_iso'] = datetime.fromtimestamp(
int(meta['lastedited_raw']) / 1000
).isoformat()
meta['author_ids'] = getjson(
info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
)['data']['authorIDs']
break
except HTTPError as e:
tries += 1
if tries > 3:
print ("Too many failures ({0}), skipping".format(padid), file=sys.stderr)
skip=True
print(
"Too many failures ({0}), skipping".format(padid),
file=sys.stderr,
)
skip = True
break
else:
sleep(3)
except TypeError as e:
print ("Type Error loading pad {0} (phantom pad?), skipping".format(padid), file=sys.stderr)
skip=True
print(
"Type Error loading pad {0} (phantom pad?), skipping".format(
padid
),
file=sys.stderr,
)
skip = True
break
if skip:
@ -159,7 +292,7 @@ def main (args):
count += 1
if args.output:
print (padid)
print(padid)
if args.all or (args.meta or args.text or args.html or args.dhtml):
try:
@ -168,7 +301,7 @@ def main (args):
pass
if args.all or args.text:
text = getjson(info['localapiurl']+'getText?'+urlencode(data))
text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))
ver = {"type": "text"}
versions.append(ver)
ver["code"] = text["_code"]
@ -180,17 +313,31 @@ def main (args):
##########################################
if args.nopublish and args.nopublish in text:
# NEED TO PURGE ANY EXISTING DOCS
try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json"))
try_deleting(
(
p + raw_ext,
p + ".raw.html",
p + ".diff.html",
p + ".meta.json",
)
)
continue
##########################################
## ENFORCE __PUBLISH__ MAGIC WORD
##########################################
if args.publish_opt_in and args.publish not in text:
try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json"))
try_deleting(
(
p + raw_ext,
p + ".raw.html",
p + ".diff.html",
p + ".meta.json",
)
)
continue
ver["path"] = p+raw_ext
ver["path"] = p + raw_ext
ver["url"] = quote(ver["path"])
with open(ver["path"], "w") as f:
f.write(text)
@ -199,38 +346,86 @@ def main (args):
links = []
if args.css:
links.append({"href":args.css, "rel":"stylesheet"})
links.append({"href": args.css, "rel": "stylesheet"})
# todo, make this process reflect which files actually were made
versionbaseurl = quote(padid)
links.append({"href":versions[0]["url"], "rel":"alternate", "type":"text/html", "title":"Etherpad"})
links.append(
{
"href": versions[0]["url"],
"rel": "alternate",
"type": "text/html",
"title": "Etherpad",
}
)
if args.all or args.text:
links.append({"href":versionbaseurl+raw_ext, "rel":"alternate", "type":"text/plain", "title":"Plain text"})
links.append(
{
"href": versionbaseurl + raw_ext,
"rel": "alternate",
"type": "text/plain",
"title": "Plain text",
}
)
if args.all or args.html:
links.append({"href":versionbaseurl+".raw.html", "rel":"alternate", "type":"text/html", "title":"HTML"})
links.append(
{
"href": versionbaseurl + ".raw.html",
"rel": "alternate",
"type": "text/html",
"title": "HTML",
}
)
if args.all or args.dhtml:
links.append({"href":versionbaseurl+".diff.html", "rel":"alternate", "type":"text/html", "title":"HTML with author colors"})
links.append(
{
"href": versionbaseurl + ".diff.html",
"rel": "alternate",
"type": "text/html",
"title": "HTML with author colors",
}
)
if args.all or args.meta:
links.append({"href":versionbaseurl+".meta.json", "rel":"alternate", "type":"application/json", "title":"Meta data"})
links.append(
{
"href": versionbaseurl + ".meta.json",
"rel": "alternate",
"type": "application/json",
"title": "Meta data",
}
)
# links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"})
if args.all or args.dhtml:
data['startRev'] = "0"
html = getjson(info['localapiurl']+'createDiffHTML?'+urlencode(data))
html = getjson(
info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
)
ver = {"type": "diffhtml"}
versions.append(ver)
ver["code"] = html["_code"]
if html["_code"] == 200:
try:
html = html['data']['html']
ver["path"] = p+".diff.html"
ver["path"] = p + ".diff.html"
ver["url"] = quote(ver["path"])
# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
doc = html5lib.parse(
html, treebuilder="etree", namespaceHTMLElements=False
)
html5tidy(
doc,
indent=True,
title=padid,
scripts=args.script,
links=links,
)
with open(ver["path"], "w") as f:
# f.write(html.encode("utf-8"))
print(ET.tostring(doc, method="html", encoding="unicode"), file=f)
print(
ET.tostring(doc, method="html", encoding="unicode"),
file=f,
)
except TypeError:
# Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!
ver["message"] = html["message"]
@ -239,19 +434,30 @@ def main (args):
# Process text, html, dhtml, all options
if args.all or args.html:
html = getjson(info['localapiurl']+'getHTML?'+urlencode(data))
html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))
ver = {"type": "html"}
versions.append(ver)
ver["code"] = html["_code"]
if html["_code"] == 200:
html = html['data']['html']
ver["path"] = p+".raw.html"
ver["path"] = p + ".raw.html"
ver["url"] = quote(ver["path"])
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
doc = html5lib.parse(
html, treebuilder="etree", namespaceHTMLElements=False
)
html5tidy(
doc,
indent=True,
title=padid,
scripts=args.script,
links=links,
)
with open(ver["path"], "w") as f:
# f.write(html.encode("utf-8"))
print (ET.tostring(doc, method="html", encoding="unicode"), file=f)
print(
ET.tostring(doc, method="html", encoding="unicode"),
file=f,
)
# output meta
if args.all or args.meta:

View File

@ -1,14 +1,18 @@
from argparse import ArgumentParser
import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args):
p = ArgumentParser("call getRevisionsCount for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
args = p.parse_args(args)
@ -18,9 +22,9 @@ def main(args):
data = {}
data['apikey'] = info['apikey']
data['padID'] = args.padid.encode("utf-8")
requesturl = apiurl+'getRevisionsCount?'+urlencode(data)
requesturl = apiurl + 'getRevisionsCount?' + urlencode(data)
if args.showurl:
print (requesturl)
print(requesturl)
else:
results = json.load(urlopen(requesturl))['data']['revisions']
print (results)
print(results)

View File

@ -1,39 +1,60 @@
import json
import sys
from argparse import ArgumentParser
import json, sys
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
import requests
LIMIT_BYTES = 100 * 1000
LIMIT_BYTES = 100*1000
def main(args):
p = ArgumentParser("calls the setHTML API function for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--html", default=None, help="html, default: read from stdin")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--html", default=None, help="html, default: read from stdin"
)
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
# p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--create", default=False, action="store_true", help="flag to create pad if necessary")
p.add_argument("--limit", default=False, action="store_true", help="limit text to 100k (etherpad limit)")
p.add_argument(
"--create",
default=False,
action="store_true",
help="flag to create pad if necessary",
)
p.add_argument(
"--limit",
default=False,
action="store_true",
help="limit text to 100k (etherpad limit)",
)
args = p.parse_args(args)
with open(args.padinfo) as f:
info = json.load(f)
apiurl = info.get("apiurl")
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
# data = {}
# data['apikey'] = info['apikey']
# data['padID'] = args.padid # is utf-8 encoded
# data = {}
# data['apikey'] = info['apikey']
# data['padID'] = args.padid # is utf-8 encoded
createPad = False
if args.create:
# check if it's in fact necessary
requesturl = apiurl+'getRevisionsCount?'+urlencode({'apikey': info['apikey'], 'padID': args.padid})
requesturl = (
apiurl
+ 'getRevisionsCount?'
+ urlencode({'apikey': info['apikey'], 'padID': args.padid})
)
results = json.load(urlopen(requesturl))
print (json.dumps(results, indent=2), file=sys.stderr)
print(json.dumps(results, indent=2), file=sys.stderr)
if results['code'] != 0:
createPad = True
@ -47,21 +68,27 @@ def main(args):
params['padID'] = args.padid
if createPad:
requesturl = apiurl+'createPad'
requesturl = apiurl + 'createPad'
if args.showurl:
print (requesturl)
results = requests.post(requesturl, params=params, data={'text': ''}) # json.load(urlopen(requesturl))
print(requesturl)
results = requests.post(
requesturl, params=params, data={'text': ''}
) # json.load(urlopen(requesturl))
results = json.loads(results.text)
print (json.dumps(results, indent=2))
print(json.dumps(results, indent=2))
if len(html) > LIMIT_BYTES and args.limit:
print ("limiting", len(text), LIMIT_BYTES, file=sys.stderr)
print("limiting", len(text), LIMIT_BYTES, file=sys.stderr)
html = html[:LIMIT_BYTES]
requesturl = apiurl+'setHTML'
requesturl = apiurl + 'setHTML'
if args.showurl:
print (requesturl)
print(requesturl)
# params['html'] = html
results = requests.post(requesturl, params={'apikey': info['apikey']}, data={'apikey': info['apikey'], 'padID': args.padid, 'html': html}) # json.load(urlopen(requesturl))
results = requests.post(
requesturl,
params={'apikey': info['apikey']},
data={'apikey': info['apikey'], 'padID': args.padid, 'html': html},
) # json.load(urlopen(requesturl))
results = json.loads(results.text)
print (json.dumps(results, indent=2))
print(json.dumps(results, indent=2))

View File

@ -1,24 +1,39 @@
import json
import sys
from argparse import ArgumentParser
import json, sys
from urllib.parse import urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
from urllib.parse import quote, urlencode
from urllib.request import HTTPError, URLError, urlopen
import requests
LIMIT_BYTES = 100 * 1000
LIMIT_BYTES = 100*1000
def main(args):
p = ArgumentParser("calls the getText API function for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--text", default=None, help="text, default: read from stdin")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--text", default=None, help="text, default: read from stdin"
)
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
# p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--create", default=False, action="store_true", help="flag to create pad if necessary")
p.add_argument("--limit", default=False, action="store_true", help="limit text to 100k (etherpad limit)")
p.add_argument(
"--create",
default=False,
action="store_true",
help="flag to create pad if necessary",
)
p.add_argument(
"--limit",
default=False,
action="store_true",
help="limit text to 100k (etherpad limit)",
)
args = p.parse_args(args)
with open(args.padinfo) as f:
@ -27,11 +42,11 @@ def main(args):
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
data = {}
data['apikey'] = info['apikey']
data['padID'] = args.padid # is utf-8 encoded
data['padID'] = args.padid # is utf-8 encoded
createPad = False
if args.create:
requesturl = apiurl+'getRevisionsCount?'+urlencode(data)
requesturl = apiurl + 'getRevisionsCount?' + urlencode(data)
results = json.load(urlopen(requesturl))
# print (json.dumps(results, indent=2))
if results['code'] != 0:
@ -43,20 +58,26 @@ def main(args):
text = sys.stdin.read()
if len(text) > LIMIT_BYTES and args.limit:
print ("limiting", len(text), LIMIT_BYTES)
print("limiting", len(text), LIMIT_BYTES)
text = text[:LIMIT_BYTES]
data['text'] = text
if createPad:
requesturl = apiurl+'createPad'
requesturl = apiurl + 'createPad'
else:
requesturl = apiurl+'setText'
requesturl = apiurl + 'setText'
if args.showurl:
print (requesturl)
results = requests.post(requesturl, params=data) # json.load(urlopen(requesturl))
print(requesturl)
results = requests.post(
requesturl, params=data
) # json.load(urlopen(requesturl))
results = json.loads(results.text)
if results['code'] != 0:
print ("setText: ERROR ({0}) on pad {1}: {2}".format(results['code'], args.padid, results['message']))
print(
"setText: ERROR ({0}) on pad {1}: {2}".format(
results['code'], args.padid, results['message']
)
)
# json.dumps(results, indent=2)

View File

@ -1,17 +1,25 @@
import json
import re
import sys
from argparse import ArgumentParser
import json, sys, re
from .common import *
"""
Extract and output selected fields of metadata
"""
def main (args):
p = ArgumentParser("extract & display meta data from a specific .meta.json file, or for a given padid (nb: it still looks for a .meta.json file)")
def main(args):
p = ArgumentParser(
"extract & display meta data from a specific .meta.json file, or for a given padid (nb: it still looks for a .meta.json file)"
)
p.add_argument("--path", default=None, help="read from a meta.json file")
p.add_argument("--padid", default=None, help="read meta for this padid")
p.add_argument("--format", default="{padid}", help="format str, default: {padid}")
p.add_argument(
"--format", default="{padid}", help="format str, default: {padid}"
)
args = p.parse_args(args)
path = args.path
@ -19,7 +27,7 @@ def main (args):
path = padpath(args.padid) + ".meta.json"
if not path:
print ("Must specify either --path or --padid")
print("Must specify either --path or --padid")
sys.exit(-1)
with open(path) as f:
@ -27,5 +35,4 @@ def main (args):
formatstr = args.format.decode("utf-8")
formatstr = re.sub(r"{(\w+)}", r"{0[\1]}", formatstr)
print (formatstr.format(meta).encode("utf-8"))
print(formatstr.format(meta).encode("utf-8"))

View File

@ -1,13 +1,17 @@
import json
import os
import re
import sys
from argparse import ArgumentParser
import sys, json, re, os
from datetime import datetime
from math import ceil, floor
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
from math import ceil, floor
from .common import *
"""
status (meta):
Update meta data files for those that have changed.
@ -22,16 +26,18 @@ complicates the "syncing" idea....
"""
class PadItemException (Exception):
class PadItemException(Exception):
pass
class PadItem ():
def __init__ (self, padid=None, path=None, padexists=False):
class PadItem:
def __init__(self, padid=None, path=None, padexists=False):
self.padexists = padexists
if padid and path:
raise PadItemException("only give padid or path")
if not (padid or path):
raise PadItemException("either padid or path must be specified")
raise PadItemException("either padid or path must be specified")
if padid:
self.padid = padid
self.path = padpath(padid, group_path="g")
@ -40,7 +46,7 @@ class PadItem ():
self.padid = padpath2id(path)
@property
def status (self):
def status(self):
if self.fileexists:
if self.padexists:
return "S"
@ -52,26 +58,77 @@ class PadItem ():
return "?"
@property
def fileexists (self):
def fileexists(self):
return os.path.exists(self.path)
def ignore_p (path, settings=None):
def ignore_p(path, settings=None):
if path.startswith("."):
return True
def main (args):
p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)")
def main(args):
p = ArgumentParser(
"Check for pads that have changed since last sync (according to .meta.json)"
)
# p.add_argument("padid", nargs="*", default=[])
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
p.add_argument("--pub", default=".", help="folder to store files for public pads, default: pub")
p.add_argument("--group", default="g", help="folder to store files for group pads, default: g")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False")
p.add_argument("--text", default=False, action="store_true", help="download text to PADID.txt, default: False")
p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False")
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.dhtml, default: False")
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument(
"--zerorevs",
default=False,
action="store_true",
help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)",
)
p.add_argument(
"--pub",
default=".",
help="folder to store files for public pads, default: pub",
)
p.add_argument(
"--group",
default="g",
help="folder to store files for group pads, default: g",
)
p.add_argument(
"--skip",
default=None,
type=int,
help="skip this many items, default: None",
)
p.add_argument(
"--meta",
default=False,
action="store_true",
help="download meta to PADID.meta.json, default: False",
)
p.add_argument(
"--text",
default=False,
action="store_true",
help="download text to PADID.txt, default: False",
)
p.add_argument(
"--html",
default=False,
action="store_true",
help="download html to PADID.html, default: False",
)
p.add_argument(
"--dhtml",
default=False,
action="store_true",
help="download dhtml to PADID.dhtml, default: False",
)
p.add_argument(
"--all",
default=False,
action="store_true",
help="download all files (meta, text, html, dhtml), default: False",
)
args = p.parse_args(args)
info = loadpadinfo(args.padinfo)
@ -81,7 +138,9 @@ def main (args):
padsbypath = {}
# listAllPads
padids = getjson(info['apiurl']+'listAllPads?'+urlencode(data))['data']['padIDs']
padids = getjson(info['apiurl'] + 'listAllPads?' + urlencode(data))['data'][
'padIDs'
]
padids.sort()
for padid in padids:
pad = PadItem(padid=padid, padexists=True)
@ -104,9 +163,9 @@ def main (args):
if p.status != curstat:
curstat = p.status
if curstat == "F":
print ("New/changed files")
print("New/changed files")
elif curstat == "P":
print ("New/changed pads")
print("New/changed pads")
elif curstat == ".":
print ("Up to date")
print (" ", p.status, p.padid)
print("Up to date")
print(" ", p.status, p.padid)

12
pyproject.toml Normal file
View File

@ -0,0 +1,12 @@
[build-system]
requires = [
"setuptools>=41.0.0",
"setuptools-scm",
"wheel",
]
build-backend = "setuptools.build_meta"
[tool.black]
line-length = 80
target-version = ['py35', 'py36', 'py37']
skip-string-normalization = true

9
setup.cfg Normal file
View File

@ -0,0 +1,9 @@
[flake8]
max-line-length = 80
[isort]
known_first_party = etherpump
line_length = 80
multi_line_output = 3
include_trailing_comma = True
skip = .venv

View File

@ -1,8 +1,9 @@
#!/usr/bin/env python3
from etherpump import VERSION
from setuptools import find_packages, setup
from etherpump import VERSION
with open('README.md', 'r') as handle:
long_description = handle.read()