Add maintenance tools and run them
This commit is contained in:
parent
159165d2d5
commit
8f18594833
9
Makefile
9
Makefile
@ -1,4 +1,13 @@
|
||||
SOURCE_DIRS := bin/ etherpump/
|
||||
|
||||
publish:
|
||||
@rm -rf dist
|
||||
@python setup.py bdist_wheel
|
||||
@twine upload dist/*
|
||||
|
||||
format:
|
||||
@black $(SOURCE_DIRS)
|
||||
@isort -rc $(SOURCE_DIRS)
|
||||
|
||||
lint:
|
||||
@flake8 $(SOURCE_DIRS)
|
||||
|
17
README.md
17
README.md
@ -126,6 +126,23 @@ Publishing
|
||||
|
||||
You should have a [PyPi](https://pypi.org/) account and be added as an owner/maintainer on the [etherpump package](https://pypi.org/project/etherpump/).
|
||||
|
||||
Maintenance utilities
|
||||
---------------------
|
||||
|
||||
Tools to help things stay tidy over time.
|
||||
|
||||
```bash
|
||||
$ pip install flake8 isort black
|
||||
$ make format
|
||||
$ make lint
|
||||
```
|
||||
|
||||
Please see the following links for further reading:
|
||||
|
||||
* http://flake8.pycqa.org
|
||||
* https://isort.readthedocs.io
|
||||
* https://black.readthedocs.io
|
||||
|
||||
License
|
||||
=======
|
||||
|
||||
|
@ -1,8 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from etherpump import VERSION
|
||||
import sys
|
||||
|
||||
from etherpump import VERSION
|
||||
|
||||
usage = """Usage:
|
||||
etherpump CMD
|
||||
|
||||
@ -43,7 +44,9 @@ except IndexError:
|
||||
sys.exit(0)
|
||||
try:
|
||||
# http://stackoverflow.com/questions/301134/dynamic-module-import-in-python
|
||||
cmdmod = __import__("etherpump.commands.%s" % cmd, fromlist=["etherdump.commands"])
|
||||
cmdmod = __import__(
|
||||
"etherpump.commands.%s" % cmd, fromlist=["etherdump.commands"]
|
||||
)
|
||||
cmdmod.main(args)
|
||||
except ImportError as e:
|
||||
print("Error performing command '{0}'\n(python said: {1})\n".format(cmd, e))
|
||||
|
@ -1,4 +1,4 @@
|
||||
import os
|
||||
|
||||
DATAPATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data")
|
||||
VERSION = '0.0.2'
|
||||
VERSION = '0.0.2'
|
||||
|
@ -1,8 +1,10 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
|
||||
import json
|
||||
import os
|
||||
from argparse import ArgumentParser
|
||||
import json, os
|
||||
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("")
|
||||
@ -18,6 +20,6 @@ def main(args):
|
||||
ret.append(meta)
|
||||
|
||||
if args.indent:
|
||||
print (json.dumps(ret, indent=args.indent))
|
||||
print(json.dumps(ret, indent=args.indent))
|
||||
else:
|
||||
print (json.dumps(ret))
|
||||
print(json.dumps(ret))
|
||||
|
@ -1,24 +1,35 @@
|
||||
|
||||
import re, os, json, sys
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from html.entities import name2codepoint
|
||||
from math import ceil, floor
|
||||
from time import sleep
|
||||
|
||||
from urllib.parse import urlparse, urlunparse, urlencode, quote_plus, unquote_plus
|
||||
from urllib.request import urlopen, URLError, HTTPError
|
||||
from html.entities import name2codepoint
|
||||
from urllib.parse import (
|
||||
quote_plus,
|
||||
unquote_plus,
|
||||
urlencode,
|
||||
urlparse,
|
||||
urlunparse,
|
||||
)
|
||||
from urllib.request import HTTPError, URLError, urlopen
|
||||
|
||||
groupnamepat = re.compile(r"^g\.(\w+)\$")
|
||||
def splitpadname (padid):
|
||||
|
||||
|
||||
def splitpadname(padid):
|
||||
m = groupnamepat.match(padid)
|
||||
if m:
|
||||
return(m.group(1), padid[m.end():])
|
||||
return (m.group(1), padid[m.end() :])
|
||||
else:
|
||||
return ("", padid)
|
||||
|
||||
def padurl (padid, ):
|
||||
|
||||
def padurl(padid,):
|
||||
return padid
|
||||
|
||||
def padpath (padid, pub_path="", group_path="", normalize=False):
|
||||
|
||||
def padpath(padid, pub_path="", group_path="", normalize=False):
|
||||
g, p = splitpadname(padid)
|
||||
p = quote_plus(p)
|
||||
if normalize:
|
||||
@ -32,7 +43,8 @@ def padpath (padid, pub_path="", group_path="", normalize=False):
|
||||
else:
|
||||
return os.path.join(pub_path, p)
|
||||
|
||||
def padpath2id (path):
|
||||
|
||||
def padpath2id(path):
|
||||
if type(path) == str:
|
||||
path = path.encode("utf-8")
|
||||
dd, p = os.path.split(path)
|
||||
@ -43,7 +55,8 @@ def padpath2id (path):
|
||||
else:
|
||||
return p.decode("utf-8")
|
||||
|
||||
def getjson (url, max_retry=3, retry_sleep_time=3):
|
||||
|
||||
def getjson(url, max_retry=3, retry_sleep_time=3):
|
||||
ret = {}
|
||||
ret["_retries"] = 0
|
||||
while ret["_retries"] <= max_retry:
|
||||
@ -61,13 +74,14 @@ def getjson (url, max_retry=3, retry_sleep_time=3):
|
||||
except ValueError as e:
|
||||
url = "http://localhost" + url
|
||||
except HTTPError as e:
|
||||
print ("HTTPError {0}".format(e), file=sys.stderr)
|
||||
print("HTTPError {0}".format(e), file=sys.stderr)
|
||||
ret["_code"] = e.code
|
||||
ret["_retries"]+=1
|
||||
ret["_retries"] += 1
|
||||
if retry_sleep_time:
|
||||
sleep(retry_sleep_time)
|
||||
return ret
|
||||
|
||||
|
||||
def loadpadinfo(p):
|
||||
with open(p) as f:
|
||||
info = json.load(f)
|
||||
@ -75,17 +89,17 @@ def loadpadinfo(p):
|
||||
info['localapiurl'] = info.get('apiurl')
|
||||
return info
|
||||
|
||||
def progressbar (i, num, label="", file=sys.stderr):
|
||||
|
||||
def progressbar(i, num, label="", file=sys.stderr):
|
||||
p = float(i) / num
|
||||
percentage = int(floor(p*100))
|
||||
bars = int(ceil(p*20))
|
||||
bar = ("*"*bars) + ("-"*(20-bars))
|
||||
msg = "\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label)
|
||||
percentage = int(floor(p * 100))
|
||||
bars = int(ceil(p * 20))
|
||||
bar = ("*" * bars) + ("-" * (20 - bars))
|
||||
msg = "\r{0} {1}/{2} {3}... ".format(bar, (i + 1), num, label)
|
||||
sys.stderr.write(msg)
|
||||
sys.stderr.flush()
|
||||
|
||||
|
||||
|
||||
# Python developer Fredrik Lundh (author of elementtree, among other things) has such a function on his website, which works with decimal, hex and named entities:
|
||||
##
|
||||
# Removes HTML or XML character references and entities from a text string.
|
||||
@ -110,5 +124,6 @@ def unescape(text):
|
||||
text = chr(name2codepoint[text[1:-1]])
|
||||
except KeyError:
|
||||
pass
|
||||
return text # leave as is
|
||||
return text # leave as is
|
||||
|
||||
return re.sub("&#?\w+;", fixup, text)
|
||||
|
@ -1,18 +1,29 @@
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import json
|
||||
from argparse import ArgumentParser
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen
|
||||
from urllib.error import HTTPError, URLError
|
||||
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("calls the createDiffHTML API function for the given padid")
|
||||
p = ArgumentParser(
|
||||
"calls the createDiffHTML API function for the given padid"
|
||||
)
|
||||
p.add_argument("padid", help="the padid")
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: .etherdump/settings.json",
|
||||
)
|
||||
p.add_argument("--showurl", default=False, action="store_true")
|
||||
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
|
||||
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
|
||||
p.add_argument(
|
||||
"--format",
|
||||
default="text",
|
||||
help="output format, can be: text, json; default: text",
|
||||
)
|
||||
p.add_argument(
|
||||
"--rev", type=int, default=None, help="revision, default: latest"
|
||||
)
|
||||
args = p.parse_args(args)
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
@ -25,15 +36,15 @@ def main(args):
|
||||
data['startRev'] = "0"
|
||||
if args.rev != None:
|
||||
data['rev'] = args.rev
|
||||
requesturl = apiurl+'createDiffHTML?'+urlencode(data)
|
||||
requesturl = apiurl + 'createDiffHTML?' + urlencode(data)
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
print(requesturl)
|
||||
else:
|
||||
try:
|
||||
results = json.load(urlopen(requesturl))['data']
|
||||
if args.format == "json":
|
||||
print (json.dumps(results))
|
||||
print(json.dumps(results))
|
||||
else:
|
||||
print (results['html'].encode("utf-8"))
|
||||
print(results['html'].encode("utf-8"))
|
||||
except HTTPError as e:
|
||||
pass
|
||||
pass
|
||||
|
@ -1,17 +1,24 @@
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import json
|
||||
from argparse import ArgumentParser
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen
|
||||
from urllib.error import HTTPError, URLError
|
||||
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("calls the getText API function for the given padid")
|
||||
p.add_argument("padid", help="the padid")
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: .etherdump/settings.json",
|
||||
)
|
||||
p.add_argument("--showurl", default=False, action="store_true")
|
||||
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
|
||||
p.add_argument(
|
||||
"--format",
|
||||
default="text",
|
||||
help="output format, can be: text, json; default: text",
|
||||
)
|
||||
args = p.parse_args(args)
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
@ -20,14 +27,14 @@ def main(args):
|
||||
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
||||
data = {}
|
||||
data['apikey'] = info['apikey']
|
||||
data['padID'] = args.padid # is utf-8 encoded
|
||||
requesturl = apiurl+'deletePad?'+urlencode(data)
|
||||
data['padID'] = args.padid # is utf-8 encoded
|
||||
requesturl = apiurl + 'deletePad?' + urlencode(data)
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
print(requesturl)
|
||||
else:
|
||||
results = json.load(urlopen(requesturl))
|
||||
if args.format == "json":
|
||||
print (json.dumps(results))
|
||||
print(json.dumps(results))
|
||||
else:
|
||||
if results['data']:
|
||||
print (results['data']['text'].encode("utf-8"))
|
||||
print(results['data']['text'].encode("utf-8"))
|
||||
|
@ -1,12 +1,13 @@
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
import sys, json, re
|
||||
from csv import writer
|
||||
from datetime import datetime
|
||||
from math import ceil, floor
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen
|
||||
from urllib.error import HTTPError, URLError
|
||||
from csv import writer
|
||||
from math import ceil, floor
|
||||
|
||||
"""
|
||||
Dumps a CSV of all pads with columns
|
||||
@ -23,16 +24,27 @@ groupnamepat = re.compile(r"^g\.(\w+)\$")
|
||||
|
||||
out = writer(sys.stdout)
|
||||
|
||||
def jsonload (url):
|
||||
|
||||
def jsonload(url):
|
||||
f = urlopen(url)
|
||||
data = f.read()
|
||||
f.close()
|
||||
return json.loads(data)
|
||||
|
||||
def main (args):
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("outputs a CSV of information all all pads")
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False")
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: .etherdump/settings.json",
|
||||
)
|
||||
p.add_argument(
|
||||
"--zerorevs",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="include pads with zero revisions, default: False",
|
||||
)
|
||||
args = p.parse_args(args)
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
@ -40,7 +52,7 @@ def main (args):
|
||||
apiurl = info.get("apiurl")
|
||||
data = {}
|
||||
data['apikey'] = info['apikey']
|
||||
requesturl = apiurl+'listAllPads?'+urlencode(data)
|
||||
requesturl = apiurl + 'listAllPads?' + urlencode(data)
|
||||
|
||||
padids = jsonload(requesturl)['data']['padIDs']
|
||||
padids.sort()
|
||||
@ -49,36 +61,50 @@ def main (args):
|
||||
count = 0
|
||||
out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids"))
|
||||
for i, padid in enumerate(padids):
|
||||
p = (float(i) / numpads)
|
||||
percentage = int(floor(p*100))
|
||||
bars = int(ceil(p*20))
|
||||
bar = ("*"*bars) + ("-"*(20-bars))
|
||||
msg = "\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid)
|
||||
p = float(i) / numpads
|
||||
percentage = int(floor(p * 100))
|
||||
bars = int(ceil(p * 20))
|
||||
bar = ("*" * bars) + ("-" * (20 - bars))
|
||||
msg = "\r{0} {1}/{2} {3}... ".format(bar, (i + 1), numpads, padid)
|
||||
if len(msg) > maxmsglen:
|
||||
maxmsglen = len(msg)
|
||||
sys.stderr.write("\r{0}".format(" "*maxmsglen))
|
||||
sys.stderr.write("\r{0}".format(" " * maxmsglen))
|
||||
sys.stderr.write(msg.encode("utf-8"))
|
||||
sys.stderr.flush()
|
||||
m = groupnamepat.match(padid)
|
||||
if m:
|
||||
groupname = m.group(1)
|
||||
padidnogroup = padid[m.end():]
|
||||
padidnogroup = padid[m.end() :]
|
||||
else:
|
||||
groupname = ""
|
||||
padidnogroup = padid
|
||||
|
||||
data['padID'] = padid.encode("utf-8")
|
||||
revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions']
|
||||
revisions = jsonload(apiurl + 'getRevisionsCount?' + urlencode(data))[
|
||||
'data'
|
||||
]['revisions']
|
||||
if (revisions == 0) and not args.zerorevs:
|
||||
continue
|
||||
|
||||
|
||||
lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited']
|
||||
lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat()
|
||||
author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
|
||||
lastedited_raw = jsonload(apiurl + 'getLastEdited?' + urlencode(data))[
|
||||
'data'
|
||||
]['lastEdited']
|
||||
lastedited_iso = datetime.fromtimestamp(
|
||||
int(lastedited_raw) / 1000
|
||||
).isoformat()
|
||||
author_ids = jsonload(apiurl + 'listAuthorsOfPad?' + urlencode(data))[
|
||||
'data'
|
||||
]['authorIDs']
|
||||
author_ids = " ".join(author_ids).encode("utf-8")
|
||||
out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids))
|
||||
out.writerow(
|
||||
(
|
||||
padidnogroup.encode("utf-8"),
|
||||
groupname.encode("utf-8"),
|
||||
revisions,
|
||||
lastedited_iso,
|
||||
author_ids,
|
||||
)
|
||||
)
|
||||
count += 1
|
||||
|
||||
print("\nWrote {0} rows...".format(count), file=sys.stderr)
|
||||
|
||||
|
@ -1,18 +1,27 @@
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import json
|
||||
from argparse import ArgumentParser
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen
|
||||
from urllib.error import HTTPError, URLError
|
||||
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("calls the getHTML API function for the given padid")
|
||||
p.add_argument("padid", help="the padid")
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: .etherdump/settings.json",
|
||||
)
|
||||
p.add_argument("--showurl", default=False, action="store_true")
|
||||
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
|
||||
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
|
||||
p.add_argument(
|
||||
"--format",
|
||||
default="text",
|
||||
help="output format, can be: text, json; default: text",
|
||||
)
|
||||
p.add_argument(
|
||||
"--rev", type=int, default=None, help="revision, default: latest"
|
||||
)
|
||||
args = p.parse_args(args)
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
@ -24,12 +33,12 @@ def main(args):
|
||||
data['padID'] = args.padid
|
||||
if args.rev != None:
|
||||
data['rev'] = args.rev
|
||||
requesturl = apiurl+'getHTML?'+urlencode(data)
|
||||
requesturl = apiurl + 'getHTML?' + urlencode(data)
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
print(requesturl)
|
||||
else:
|
||||
results = json.load(urlopen(requesturl))['data']
|
||||
if args.format == "json":
|
||||
print (json.dumps(results))
|
||||
print(json.dumps(results))
|
||||
else:
|
||||
print (results['html'].encode("utf-8"))
|
||||
print(results['html'].encode("utf-8"))
|
||||
|
@ -1,17 +1,27 @@
|
||||
|
||||
import json
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
import json, sys
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen, URLError, HTTPError
|
||||
from urllib.request import HTTPError, URLError, urlopen
|
||||
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("calls the getText API function for the given padid")
|
||||
p.add_argument("padid", help="the padid")
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: .etherdump/settings.json",
|
||||
)
|
||||
p.add_argument("--showurl", default=False, action="store_true")
|
||||
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
|
||||
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
|
||||
p.add_argument(
|
||||
"--format",
|
||||
default="text",
|
||||
help="output format, can be: text, json; default: text",
|
||||
)
|
||||
p.add_argument(
|
||||
"--rev", type=int, default=None, help="revision, default: latest"
|
||||
)
|
||||
args = p.parse_args(args)
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
@ -20,18 +30,18 @@ def main(args):
|
||||
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
||||
data = {}
|
||||
data['apikey'] = info['apikey']
|
||||
data['padID'] = args.padid # is utf-8 encoded
|
||||
data['padID'] = args.padid # is utf-8 encoded
|
||||
if args.rev != None:
|
||||
data['rev'] = args.rev
|
||||
requesturl = apiurl+'getText?'+urlencode(data)
|
||||
requesturl = apiurl + 'getText?' + urlencode(data)
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
print(requesturl)
|
||||
else:
|
||||
resp = urlopen(requesturl).read()
|
||||
resp = resp.decode("utf-8")
|
||||
results = json.loads(resp)
|
||||
if args.format == "json":
|
||||
print (json.dumps(results))
|
||||
print(json.dumps(results))
|
||||
else:
|
||||
if results['data']:
|
||||
sys.stdout.write(results['data']['text'])
|
||||
|
@ -1,28 +1,31 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
from html5lib import parse
|
||||
import os, sys
|
||||
import os
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from xml.etree import ElementTree as ET
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
from html5lib import parse
|
||||
|
||||
|
||||
def etree_indent(elem, level=0):
|
||||
i = "\n" + level*" "
|
||||
i = "\n" + level * " "
|
||||
if len(elem):
|
||||
if not elem.text or not elem.text.strip():
|
||||
elem.text = i + " "
|
||||
if not elem.tail or not elem.tail.strip():
|
||||
elem.tail = i
|
||||
for elem in elem:
|
||||
etree_indent(elem, level+1)
|
||||
etree_indent(elem, level + 1)
|
||||
if not elem.tail or not elem.tail.strip():
|
||||
elem.tail = i
|
||||
else:
|
||||
if level and (not elem.tail or not elem.tail.strip()):
|
||||
elem.tail = i
|
||||
|
||||
def get_link_type (url):
|
||||
|
||||
def get_link_type(url):
|
||||
lurl = url.lower()
|
||||
if lurl.endswith(".html") or lurl.endswith(".htm"):
|
||||
return "text/html"
|
||||
@ -37,13 +40,17 @@ def get_link_type (url):
|
||||
elif lurl.endswith(".js") or lurl.endswith(".jsonp"):
|
||||
return "text/javascript"
|
||||
|
||||
def pluralize (x):
|
||||
|
||||
def pluralize(x):
|
||||
if type(x) == list or type(x) == tuple:
|
||||
return x
|
||||
else:
|
||||
return (x,)
|
||||
|
||||
def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, indent=False):
|
||||
|
||||
def html5tidy(
|
||||
doc, charset="utf-8", title=None, scripts=None, links=None, indent=False
|
||||
):
|
||||
if scripts:
|
||||
script_srcs = [x.attrib.get("src") for x in doc.findall(".//script")]
|
||||
for src in pluralize(scripts):
|
||||
@ -56,21 +63,30 @@ def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, inden
|
||||
for elt in doc.findall(".//link"):
|
||||
href = elt.attrib.get("href")
|
||||
if href:
|
||||
existinglinks[href] = elt
|
||||
existinglinks[href] = elt
|
||||
for link in links:
|
||||
linktype = link.get("type") or get_link_type(link["href"])
|
||||
if link["href"] in existinglinks:
|
||||
elt = existinglinks[link["href"]]
|
||||
elt.attrib["rel"] = link["rel"]
|
||||
else:
|
||||
elt = ET.SubElement(doc.find(".//head"), "link", href=link["href"], rel=link["rel"])
|
||||
elt = ET.SubElement(
|
||||
doc.find(".//head"),
|
||||
"link",
|
||||
href=link["href"],
|
||||
rel=link["rel"],
|
||||
)
|
||||
if linktype:
|
||||
elt.attrib["type"] = linktype
|
||||
elt.attrib["type"] = linktype
|
||||
if "title" in link:
|
||||
elt.attrib["title"] = link["title"]
|
||||
|
||||
if charset:
|
||||
meta_charsets = [x.attrib.get("charset") for x in doc.findall(".//meta") if x.attrib.get("charset") != None]
|
||||
meta_charsets = [
|
||||
x.attrib.get("charset")
|
||||
for x in doc.findall(".//meta")
|
||||
if x.attrib.get("charset") != None
|
||||
]
|
||||
if not meta_charsets:
|
||||
meta = ET.SubElement(doc.find(".//head"), "meta", charset=charset)
|
||||
|
||||
@ -79,33 +95,89 @@ def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, inden
|
||||
if not titleelt:
|
||||
titleelt = ET.SubElement(doc.find(".//head"), "title")
|
||||
titleelt.text = title
|
||||
|
||||
|
||||
if indent:
|
||||
etree_indent(doc)
|
||||
return doc
|
||||
|
||||
def main (args):
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("")
|
||||
p.add_argument("input", nargs="?", default=None)
|
||||
p.add_argument("--indent", default=False, action="store_true")
|
||||
p.add_argument("--mogrify", default=False, action="store_true", help="modify file in place")
|
||||
p.add_argument("--method", default="html", help="method, default: html, values: html, xml, text")
|
||||
p.add_argument(
|
||||
"--mogrify",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="modify file in place",
|
||||
)
|
||||
p.add_argument(
|
||||
"--method",
|
||||
default="html",
|
||||
help="method, default: html, values: html, xml, text",
|
||||
)
|
||||
p.add_argument("--output", default=None, help="")
|
||||
p.add_argument("--title", default=None, help="ensure/add title tag in head")
|
||||
p.add_argument("--charset", default="utf-8", help="ensure/add meta tag with charset")
|
||||
p.add_argument("--script", action="append", default=[], help="ensure/add script tag")
|
||||
p.add_argument(
|
||||
"--charset", default="utf-8", help="ensure/add meta tag with charset"
|
||||
)
|
||||
p.add_argument(
|
||||
"--script", action="append", default=[], help="ensure/add script tag"
|
||||
)
|
||||
# <link>s, see https://www.w3.org/TR/html5/links.html#links
|
||||
p.add_argument("--stylesheet", action="append", default=[], help="ensure/add style link")
|
||||
p.add_argument("--alternate", action="append", default=[], nargs="+", help="ensure/add alternate links (optionally followed by a title and type)")
|
||||
p.add_argument("--next", action="append", default=[], nargs="+", help="ensure/add alternate link")
|
||||
p.add_argument("--prev", action="append", default=[], nargs="+", help="ensure/add alternate link")
|
||||
p.add_argument("--search", action="append", default=[], nargs="+", help="ensure/add search link")
|
||||
p.add_argument("--rss", action="append", default=[], nargs="+", help="ensure/add alternate link of type application/rss+xml")
|
||||
p.add_argument("--atom", action="append", default=[], nargs="+", help="ensure/add alternate link of type application/atom+xml")
|
||||
p.add_argument(
|
||||
"--stylesheet",
|
||||
action="append",
|
||||
default=[],
|
||||
help="ensure/add style link",
|
||||
)
|
||||
p.add_argument(
|
||||
"--alternate",
|
||||
action="append",
|
||||
default=[],
|
||||
nargs="+",
|
||||
help="ensure/add alternate links (optionally followed by a title and type)",
|
||||
)
|
||||
p.add_argument(
|
||||
"--next",
|
||||
action="append",
|
||||
default=[],
|
||||
nargs="+",
|
||||
help="ensure/add alternate link",
|
||||
)
|
||||
p.add_argument(
|
||||
"--prev",
|
||||
action="append",
|
||||
default=[],
|
||||
nargs="+",
|
||||
help="ensure/add alternate link",
|
||||
)
|
||||
p.add_argument(
|
||||
"--search",
|
||||
action="append",
|
||||
default=[],
|
||||
nargs="+",
|
||||
help="ensure/add search link",
|
||||
)
|
||||
p.add_argument(
|
||||
"--rss",
|
||||
action="append",
|
||||
default=[],
|
||||
nargs="+",
|
||||
help="ensure/add alternate link of type application/rss+xml",
|
||||
)
|
||||
p.add_argument(
|
||||
"--atom",
|
||||
action="append",
|
||||
default=[],
|
||||
nargs="+",
|
||||
help="ensure/add alternate link of type application/atom+xml",
|
||||
)
|
||||
|
||||
args = p.parse_args(args)
|
||||
links = []
|
||||
def add_links (links, items, rel, _type=None):
|
||||
|
||||
def add_links(links, items, rel, _type=None):
|
||||
for href in items:
|
||||
d = {}
|
||||
d["rel"] = rel
|
||||
@ -128,6 +200,7 @@ def main (args):
|
||||
d["href"] = href
|
||||
|
||||
links.append(d)
|
||||
|
||||
for rel in ("stylesheet", "alternate", "next", "prev", "search"):
|
||||
add_links(links, getattr(args, rel), rel)
|
||||
for item in args.rss:
|
||||
@ -144,27 +217,33 @@ def main (args):
|
||||
doc = parse(fin, treebuilder="etree", namespaceHTMLElements=False)
|
||||
if fin != sys.stdin:
|
||||
fin.close()
|
||||
html5tidy(doc, scripts=args.script, links=links, title=args.title, indent=args.indent)
|
||||
html5tidy(
|
||||
doc,
|
||||
scripts=args.script,
|
||||
links=links,
|
||||
title=args.title,
|
||||
indent=args.indent,
|
||||
)
|
||||
|
||||
# OUTPUT
|
||||
tmppath = None
|
||||
if args.output:
|
||||
fout = open(args.output, "w")
|
||||
elif args.mogrify:
|
||||
tmppath = args.input+".tmp"
|
||||
tmppath = args.input + ".tmp"
|
||||
fout = open(tmppath, "w")
|
||||
else:
|
||||
fout = sys.stdout
|
||||
|
||||
print (ET.tostring(doc, method=args.method, encoding="unicode"), file=fout)
|
||||
print(ET.tostring(doc, method=args.method, encoding="unicode"), file=fout)
|
||||
|
||||
if fout != sys.stdout:
|
||||
fout.close()
|
||||
|
||||
if tmppath:
|
||||
os.rename(args.input, args.input+"~")
|
||||
os.rename(args.input, args.input + "~")
|
||||
os.rename(tmppath, args.input)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
||||
|
@ -1,16 +1,19 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from argparse import ArgumentParser
|
||||
import sys, json, re, os, time
|
||||
from datetime import datetime
|
||||
import dateutil.parser
|
||||
|
||||
from urllib.parse import urlparse, urlunparse, urlencode, quote
|
||||
from urllib.request import urlopen, URLError, HTTPError
|
||||
|
||||
from jinja2 import FileSystemLoader, Environment
|
||||
from etherpump.commands.common import *
|
||||
from time import sleep
|
||||
from urllib.parse import quote, urlencode, urlparse, urlunparse
|
||||
from urllib.request import HTTPError, URLError, urlopen
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
import dateutil.parser
|
||||
from etherpump.commands.common import *
|
||||
|
||||
|
||||
"""
|
||||
index:
|
||||
@ -20,7 +23,8 @@ index:
|
||||
|
||||
"""
|
||||
|
||||
def group (items, key=lambda x: x):
|
||||
|
||||
def group(items, key=lambda x: x):
|
||||
""" returns a list of lists, of items grouped by a key function """
|
||||
ret = []
|
||||
keys = {}
|
||||
@ -34,10 +38,12 @@ def group (items, key=lambda x: x):
|
||||
ret.append(keys[k])
|
||||
return ret
|
||||
|
||||
|
||||
# def base (x):
|
||||
# return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
|
||||
|
||||
def splitextlong (x):
|
||||
|
||||
def splitextlong(x):
|
||||
""" split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """
|
||||
m = re.search(r"^(.*?)(\..*)$", x)
|
||||
if m:
|
||||
@ -45,20 +51,24 @@ def splitextlong (x):
|
||||
else:
|
||||
return x, ''
|
||||
|
||||
def base (x):
|
||||
|
||||
def base(x):
|
||||
return splitextlong(x)[0]
|
||||
|
||||
def excerpt (t, chars=25):
|
||||
|
||||
def excerpt(t, chars=25):
|
||||
if len(t) > chars:
|
||||
t = t[:chars] + "..."
|
||||
return t
|
||||
|
||||
def absurl (url, base=None):
|
||||
|
||||
def absurl(url, base=None):
|
||||
if not url.startswith("http"):
|
||||
return base + url
|
||||
return url
|
||||
|
||||
def url_base (url):
|
||||
|
||||
def url_base(url):
|
||||
(scheme, netloc, path, params, query, fragment) = urlparse(url)
|
||||
path, _ = os.path.split(path.lstrip("/"))
|
||||
ret = urlunparse((scheme, netloc, path, None, None, None))
|
||||
@ -66,45 +76,131 @@ def url_base (url):
|
||||
ret += "/"
|
||||
return ret
|
||||
|
||||
def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'):
|
||||
|
||||
def datetimeformat(t, format='%Y-%m-%d %H:%M:%S'):
|
||||
if type(t) == str:
|
||||
dt = dateutil.parser.parse(t)
|
||||
return dt.strftime(format)
|
||||
else:
|
||||
return time.strftime(format, time.localtime(t))
|
||||
|
||||
def main (args):
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("Convert dumped files to a document via a template.")
|
||||
|
||||
p.add_argument("input", nargs="+", help="Files to list (.meta.json files)")
|
||||
|
||||
p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in")
|
||||
p.add_argument("--template", default="index.html", help="template name, built-ins include index.html, rss.xml; default: index.html")
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: ./.etherdump/settings.json")
|
||||
p.add_argument(
|
||||
"--templatepath",
|
||||
default=None,
|
||||
help="path to find templates, default: built-in",
|
||||
)
|
||||
p.add_argument(
|
||||
"--template",
|
||||
default="index.html",
|
||||
help="template name, built-ins include index.html, rss.xml; default: index.html",
|
||||
)
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: ./.etherdump/settings.json",
|
||||
)
|
||||
# p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
|
||||
|
||||
p.add_argument("--order", default="padid", help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid")
|
||||
p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)")
|
||||
p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)")
|
||||
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
|
||||
p.add_argument(
|
||||
"--order",
|
||||
default="padid",
|
||||
help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid",
|
||||
)
|
||||
p.add_argument(
|
||||
"--reverse",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="reverse order, default: False (reverse chrono)",
|
||||
)
|
||||
p.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=0,
|
||||
help="limit to number of items, default: 0 (no limit)",
|
||||
)
|
||||
p.add_argument(
|
||||
"--skip",
|
||||
default=None,
|
||||
type=int,
|
||||
help="skip this many items, default: None",
|
||||
)
|
||||
|
||||
p.add_argument("--content", default=False, action="store_true", help="rss: include (full) content tag, default: False")
|
||||
p.add_argument("--link", default="diffhtml,html,text", help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text")
|
||||
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl")
|
||||
p.add_argument(
|
||||
"--content",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="rss: include (full) content tag, default: False",
|
||||
)
|
||||
p.add_argument(
|
||||
"--link",
|
||||
default="diffhtml,html,text",
|
||||
help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text",
|
||||
)
|
||||
p.add_argument(
|
||||
"--linkbase",
|
||||
default=None,
|
||||
help="base url to use for links, default: try to use the feedurl",
|
||||
)
|
||||
p.add_argument("--output", default=None, help="output, default: stdout")
|
||||
|
||||
p.add_argument("--files", default=False, action="store_true", help="include files (experimental)")
|
||||
p.add_argument(
|
||||
"--files",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="include files (experimental)",
|
||||
)
|
||||
|
||||
pg = p.add_argument_group('template variables')
|
||||
pg.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml")
|
||||
pg.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url")
|
||||
pg.add_argument("--title", default="etherpump", help="title for document or rss feed channel title, default: etherdump")
|
||||
pg.add_argument("--description", default="", help="rss: channel description, default: empty")
|
||||
pg.add_argument("--language", default="en-US", help="rss: feed language, default: en-US")
|
||||
pg.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily")
|
||||
pg.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1")
|
||||
pg.add_argument("--generator", default="https://gitlab.com/activearchives/etherpump", help="generator, default: https://gitlab.com/activearchives/etherdump")
|
||||
pg.add_argument("--timestamp", default=None, help="timestamp, default: now (e.g. 2015-12-01 12:30:00)")
|
||||
pg.add_argument(
|
||||
"--feedurl",
|
||||
default="feed.xml",
|
||||
help="rss: to use as feeds own (self) link, default: feed.xml",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--siteurl",
|
||||
default=None,
|
||||
help="rss: to use as channel's site link, default: the etherpad url",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--title",
|
||||
default="etherpump",
|
||||
help="title for document or rss feed channel title, default: etherdump",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--description",
|
||||
default="",
|
||||
help="rss: channel description, default: empty",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--language", default="en-US", help="rss: feed language, default: en-US"
|
||||
)
|
||||
pg.add_argument(
|
||||
"--updatePeriod",
|
||||
default="daily",
|
||||
help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--updateFrequency",
|
||||
default=1,
|
||||
type=int,
|
||||
help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--generator",
|
||||
default="https://gitlab.com/activearchives/etherpump",
|
||||
help="generator, default: https://gitlab.com/activearchives/etherdump",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--timestamp",
|
||||
default=None,
|
||||
help="timestamp, default: now (e.g. 2015-12-01 12:30:00)",
|
||||
)
|
||||
pg.add_argument("--next", default=None, help="next link, default: None)")
|
||||
pg.add_argument("--prev", default=None, help="prev link, default: None")
|
||||
|
||||
@ -129,17 +225,12 @@ def main (args):
|
||||
# Use "base" to strip (longest) extensions
|
||||
# inputs = group(inputs, base)
|
||||
|
||||
def wrappath (p):
|
||||
def wrappath(p):
|
||||
path = "./{0}".format(p)
|
||||
ext = os.path.splitext(p)[1][1:]
|
||||
return {
|
||||
"url": path,
|
||||
"path": path,
|
||||
"code": 200,
|
||||
"type": ext
|
||||
}
|
||||
return {"url": path, "path": path, "code": 200, "type": ext}
|
||||
|
||||
def metaforpaths (paths):
|
||||
def metaforpaths(paths):
|
||||
ret = {}
|
||||
pid = base(paths[0])
|
||||
ret['pad'] = ret['padid'] = pid
|
||||
@ -149,7 +240,9 @@ def main (args):
|
||||
mtime = os.stat(p).st_mtime
|
||||
if lastedited == None or mtime > lastedited:
|
||||
lastedited = mtime
|
||||
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S")
|
||||
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime(
|
||||
"%Y-%m-%dT%H:%M:%S"
|
||||
)
|
||||
ret["lastedited_raw"] = mtime
|
||||
return ret
|
||||
|
||||
@ -169,7 +262,7 @@ def main (args):
|
||||
# else:
|
||||
# return metaforpaths(paths)
|
||||
|
||||
def fixdates (padmeta):
|
||||
def fixdates(padmeta):
|
||||
d = dateutil.parser.parse(padmeta["lastedited_iso"])
|
||||
padmeta["lastedited"] = d
|
||||
padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000")
|
||||
@ -180,17 +273,21 @@ def main (args):
|
||||
pads = list(map(fixdates, pads))
|
||||
args.pads = list(pads)
|
||||
|
||||
def could_have_base (x, y):
|
||||
return x == y or (x.startswith(y) and x[len(y):].startswith("."))
|
||||
def could_have_base(x, y):
|
||||
return x == y or (x.startswith(y) and x[len(y) :].startswith("."))
|
||||
|
||||
def get_best_pad (x):
|
||||
def get_best_pad(x):
|
||||
for pb in padbases:
|
||||
p = pads_by_base[pb]
|
||||
if could_have_base(x, pb):
|
||||
return p
|
||||
|
||||
def has_version (padinfo, path):
|
||||
return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path]
|
||||
def has_version(padinfo, path):
|
||||
return [
|
||||
x
|
||||
for x in padinfo['versions']
|
||||
if 'path' in x and x['path'] == "./" + path
|
||||
]
|
||||
|
||||
if args.files:
|
||||
inputs = args.input
|
||||
@ -208,25 +305,33 @@ def main (args):
|
||||
# print ("PADBASES", file=sys.stderr)
|
||||
# for pb in padbases:
|
||||
# print (" ", pb, file=sys.stderr)
|
||||
print ("pairing input files with pads", file=sys.stderr)
|
||||
print("pairing input files with pads", file=sys.stderr)
|
||||
for x in inputs:
|
||||
# pair input with a pad if possible
|
||||
xbasename = os.path.basename(x)
|
||||
p = get_best_pad(xbasename)
|
||||
if p:
|
||||
if not has_version(p, x):
|
||||
print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr)
|
||||
print(
|
||||
"Grouping file {0} with pad {1}".format(x, p['padid']),
|
||||
file=sys.stderr,
|
||||
)
|
||||
p['versions'].append(wrappath(x))
|
||||
else:
|
||||
print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr)
|
||||
print(
|
||||
"Skipping existing version {0} ({1})...".format(
|
||||
x, p['padid']
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
removelist.append(x)
|
||||
# Removed Matches files
|
||||
for x in removelist:
|
||||
inputs.remove(x)
|
||||
print ("Remaining files:", file=sys.stderr)
|
||||
print("Remaining files:", file=sys.stderr)
|
||||
for x in inputs:
|
||||
print (x, file=sys.stderr)
|
||||
print (file=sys.stderr)
|
||||
print(x, file=sys.stderr)
|
||||
print(file=sys.stderr)
|
||||
# Add "fake" pads for remaining files
|
||||
for x in inputs:
|
||||
args.pads.append(metaforpaths([x]))
|
||||
@ -242,7 +347,9 @@ def main (args):
|
||||
|
||||
# order items & apply limit
|
||||
if args.order == "lastedited":
|
||||
args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse)
|
||||
args.pads.sort(
|
||||
key=lambda x: x.get("lastedited_iso"), reverse=args.reverse
|
||||
)
|
||||
elif args.order == "pad":
|
||||
args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse)
|
||||
elif args.order == "padid":
|
||||
@ -250,12 +357,14 @@ def main (args):
|
||||
elif args.order == "revisions":
|
||||
args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse)
|
||||
elif args.order == "authors":
|
||||
args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse)
|
||||
args.pads.sort(
|
||||
key=lambda x: len(x.get("authors")), reverse=args.reverse
|
||||
)
|
||||
else:
|
||||
raise Exception("That ordering is not implemented!")
|
||||
|
||||
if args.limit:
|
||||
args.pads = args.pads[:args.limit]
|
||||
args.pads = args.pads[: args.limit]
|
||||
|
||||
# add versions_by_type, add in full text
|
||||
# add link (based on args.link)
|
||||
@ -272,7 +381,7 @@ def main (args):
|
||||
|
||||
if "text" in versions_by_type:
|
||||
try:
|
||||
with open (versions_by_type["text"]["path"]) as f:
|
||||
with open(versions_by_type["text"]["path"]) as f:
|
||||
p["text"] = f.read()
|
||||
except FileNotFoundError:
|
||||
p['text'] = ''
|
||||
@ -289,6 +398,6 @@ def main (args):
|
||||
|
||||
if args.output:
|
||||
with open(args.output, "w") as f:
|
||||
print (template.render(vars(args)), file=f)
|
||||
print(template.render(vars(args)), file=f)
|
||||
else:
|
||||
print (template.render(vars(args)))
|
||||
print(template.render(vars(args)))
|
||||
|
@ -1,19 +1,19 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from urllib.parse import urlencode, urlparse, urlunparse
|
||||
from urllib.request import HTTPError, URLError, urlopen
|
||||
|
||||
from urllib.parse import urlparse, urlunparse, urlencode
|
||||
from urllib.request import urlopen, URLError, HTTPError
|
||||
|
||||
import json, os, sys
|
||||
|
||||
def get_api(url, cmd=None, data=None, verbose=False):
|
||||
try:
|
||||
useurl = url+cmd
|
||||
useurl = url + cmd
|
||||
if data:
|
||||
useurl += "?"+urlencode(data)
|
||||
useurl += "?" + urlencode(data)
|
||||
# data['apikey'] = "7c8faa070c97f83d8f705c935a32d5141f89cbaa2158042fa92e8ddad5dbc5e1"
|
||||
if verbose:
|
||||
print ("trying", useurl, file=sys.stderr)
|
||||
print("trying", useurl, file=sys.stderr)
|
||||
resp = urlopen(useurl).read()
|
||||
resp = resp.decode("utf-8")
|
||||
resp = json.loads(resp)
|
||||
@ -21,11 +21,11 @@ def get_api(url, cmd=None, data=None, verbose=False):
|
||||
return resp
|
||||
except ValueError as e:
|
||||
if verbose:
|
||||
print (" ValueError", e, file=sys.stderr)
|
||||
print(" ValueError", e, file=sys.stderr)
|
||||
return
|
||||
except HTTPError as e:
|
||||
if verbose:
|
||||
print (" HTTPError", e, file=sys.stderr)
|
||||
print(" HTTPError", e, file=sys.stderr)
|
||||
if e.code == 401:
|
||||
# Unauthorized is how the API responds to an incorrect API key
|
||||
return {"code": 401, "message": e}
|
||||
@ -34,7 +34,8 @@ def get_api(url, cmd=None, data=None, verbose=False):
|
||||
# # print ("returning", resp, file=sys.stderr)
|
||||
# return resp
|
||||
|
||||
def tryapiurl (url, verbose=False):
|
||||
|
||||
def tryapiurl(url, verbose=False):
|
||||
"""
|
||||
Try to use url as api, correcting if possible.
|
||||
Returns corrected / normalized URL, or None if not possible
|
||||
@ -47,22 +48,30 @@ def tryapiurl (url, verbose=False):
|
||||
params, query, fragment = ("", "", "")
|
||||
path = path.strip("/")
|
||||
# 1. try directly...
|
||||
apiurl = urlunparse((scheme, netloc, path, params, query, fragment))+"/"
|
||||
apiurl = (
|
||||
urlunparse((scheme, netloc, path, params, query, fragment)) + "/"
|
||||
)
|
||||
if get_api(apiurl, "listAllPads", verbose=verbose):
|
||||
return apiurl
|
||||
# 2. try with += api/1.2.9
|
||||
path = os.path.join(path, "api", "1.2.9")+"/"
|
||||
path = os.path.join(path, "api", "1.2.9") + "/"
|
||||
apiurl = urlunparse((scheme, netloc, path, params, query, fragment))
|
||||
if get_api(apiurl, "listAllPads", verbose=verbose):
|
||||
return apiurl
|
||||
# except ValueError as e:
|
||||
# print ("ValueError", e, file=sys.stderr)
|
||||
except URLError as e:
|
||||
print ("URLError", e, file=sys.stderr)
|
||||
print("URLError", e, file=sys.stderr)
|
||||
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("initialize an etherpump folder")
|
||||
p.add_argument("arg", nargs="*", default=[], help="optional positional args: path etherpadurl")
|
||||
p.add_argument(
|
||||
"arg",
|
||||
nargs="*",
|
||||
default=[],
|
||||
help="optional positional args: path etherpadurl",
|
||||
)
|
||||
p.add_argument("--path", default=None, help="path to initialize")
|
||||
p.add_argument("--padurl", default=None, help="")
|
||||
p.add_argument("--apikey", default=None, help="")
|
||||
@ -70,7 +79,6 @@ def main(args):
|
||||
p.add_argument("--reinit", default=False, action="store_true", help="")
|
||||
args = p.parse_args(args)
|
||||
|
||||
|
||||
path = args.path
|
||||
if path == None and len(args.arg):
|
||||
path = args.arg[0]
|
||||
@ -89,7 +97,9 @@ def main(args):
|
||||
with open(padinfopath) as f:
|
||||
padinfo = json.load(f)
|
||||
if not args.reinit:
|
||||
print ("Folder is already initialized. Use --reinit to reset settings.")
|
||||
print(
|
||||
"Folder is already initialized. Use --reinit to reset settings."
|
||||
)
|
||||
sys.exit(0)
|
||||
except IOError:
|
||||
pass
|
||||
@ -100,7 +110,7 @@ def main(args):
|
||||
apiurl = args.padurl
|
||||
while True:
|
||||
if apiurl:
|
||||
apiurl = tryapiurl(apiurl,verbose=args.verbose)
|
||||
apiurl = tryapiurl(apiurl, verbose=args.verbose)
|
||||
if apiurl:
|
||||
# print ("Got APIURL: {0}".format(apiurl))
|
||||
break
|
||||
@ -109,13 +119,18 @@ def main(args):
|
||||
apikey = args.apikey
|
||||
while True:
|
||||
if apikey:
|
||||
resp = get_api(apiurl, "listAllPads", {"apikey": apikey}, verbose=args.verbose)
|
||||
resp = get_api(
|
||||
apiurl, "listAllPads", {"apikey": apikey}, verbose=args.verbose
|
||||
)
|
||||
if resp and resp["code"] == 0:
|
||||
# print ("GOOD")
|
||||
break
|
||||
else:
|
||||
print ("bad")
|
||||
print ("The APIKEY is the contents of the file APIKEY.txt in the etherpad folder", file=sys.stderr)
|
||||
print("bad")
|
||||
print(
|
||||
"The APIKEY is the contents of the file APIKEY.txt in the etherpad folder",
|
||||
file=sys.stderr,
|
||||
)
|
||||
apikey = input("Please paste the APIKEY: ").strip()
|
||||
padinfo["apikey"] = apikey
|
||||
|
||||
|
@ -1,11 +1,13 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from argparse import ArgumentParser
|
||||
import json, os, re
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen
|
||||
from urllib.error import HTTPError, URLError
|
||||
|
||||
def group (items, key=lambda x: x):
|
||||
|
||||
def group(items, key=lambda x: x):
|
||||
ret = []
|
||||
keys = {}
|
||||
for item in items:
|
||||
@ -18,6 +20,7 @@ def group (items, key=lambda x: x):
|
||||
ret.append(keys[k])
|
||||
return ret
|
||||
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("")
|
||||
p.add_argument("input", nargs="+", help="filenames")
|
||||
@ -28,10 +31,11 @@ def main(args):
|
||||
|
||||
inputs = [x for x in inputs if not os.path.isdir(x)]
|
||||
|
||||
def base (x):
|
||||
def base(x):
|
||||
return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x)
|
||||
#from pprint import pprint
|
||||
#pprint()
|
||||
|
||||
# from pprint import pprint
|
||||
# pprint()
|
||||
gg = group(inputs, base)
|
||||
for items in gg:
|
||||
itembase = base(items[0])
|
||||
@ -41,5 +45,5 @@ def main(args):
|
||||
pass
|
||||
for i in items:
|
||||
newloc = os.path.join(itembase, i)
|
||||
print ("'{0}' => '{1}'".format(i, newloc))
|
||||
print("'{0}' => '{1}'".format(i, newloc))
|
||||
os.rename(i, newloc)
|
||||
|
@ -1,31 +1,40 @@
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import json
|
||||
import sys
|
||||
from etherpump.commands.common import getjson
|
||||
from urllib.parse import urlparse, urlunparse, urlencode
|
||||
from urllib.request import urlopen, URLError, HTTPError
|
||||
from argparse import ArgumentParser
|
||||
from urllib.parse import urlencode, urlparse, urlunparse
|
||||
from urllib.request import HTTPError, URLError, urlopen
|
||||
|
||||
def main (args):
|
||||
from etherpump.commands.common import getjson
|
||||
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("call listAllPads and print the results")
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: .etherdump/settings.json",
|
||||
)
|
||||
p.add_argument("--showurl", default=False, action="store_true")
|
||||
p.add_argument("--format", default="lines", help="output format: lines, json; default lines")
|
||||
p.add_argument(
|
||||
"--format",
|
||||
default="lines",
|
||||
help="output format: lines, json; default lines",
|
||||
)
|
||||
args = p.parse_args(args)
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
info = json.load(f)
|
||||
apiurl = info.get("apiurl")
|
||||
apiurl = info.get("apiurl")
|
||||
# apiurl = {0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
||||
data = {}
|
||||
data['apikey'] = info['apikey']
|
||||
requesturl = apiurl+'listAllPads?'+urlencode(data)
|
||||
requesturl = apiurl + 'listAllPads?' + urlencode(data)
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
print(requesturl)
|
||||
else:
|
||||
results = getjson(requesturl)['data']['padIDs']
|
||||
if args.format == "json":
|
||||
print (json.dumps(results))
|
||||
print(json.dumps(results))
|
||||
else:
|
||||
for r in results:
|
||||
print (r)
|
||||
print(r)
|
||||
|
@ -1,17 +1,24 @@
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import json
|
||||
from argparse import ArgumentParser
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen
|
||||
from urllib.error import HTTPError, URLError
|
||||
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("call listAuthorsOfPad for the padid")
|
||||
p.add_argument("padid", help="the padid")
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: .etherdump/settings.json",
|
||||
)
|
||||
p.add_argument("--showurl", default=False, action="store_true")
|
||||
p.add_argument("--format", default="lines", help="output format, can be: lines, json; default: lines")
|
||||
p.add_argument(
|
||||
"--format",
|
||||
default="lines",
|
||||
help="output format, can be: lines, json; default: lines",
|
||||
)
|
||||
args = p.parse_args(args)
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
@ -20,13 +27,13 @@ def main(args):
|
||||
data = {}
|
||||
data['apikey'] = info['apikey']
|
||||
data['padID'] = args.padid.encode("utf-8")
|
||||
requesturl = apiurl+'listAuthorsOfPad?'+urlencode(data)
|
||||
requesturl = apiurl + 'listAuthorsOfPad?' + urlencode(data)
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
print(requesturl)
|
||||
else:
|
||||
results = json.load(urlopen(requesturl))['data']['authorIDs']
|
||||
if args.format == "json":
|
||||
print (json.dumps(results))
|
||||
print(json.dumps(results))
|
||||
else:
|
||||
for r in results:
|
||||
print (r.encode("utf-8"))
|
||||
print(r.encode("utf-8"))
|
||||
|
@ -1,17 +1,20 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from argparse import ArgumentParser
|
||||
import sys, json, re, os, time
|
||||
from datetime import datetime
|
||||
from time import sleep
|
||||
from urllib.parse import quote, urlencode, urlparse, urlunparse
|
||||
from urllib.request import HTTPError, URLError, urlopen
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
import dateutil.parser
|
||||
import pypandoc
|
||||
|
||||
from urllib.parse import urlparse, urlunparse, urlencode, quote
|
||||
from urllib.request import urlopen, URLError, HTTPError
|
||||
|
||||
from jinja2 import FileSystemLoader, Environment
|
||||
from etherpump.commands.common import *
|
||||
from time import sleep
|
||||
import dateutil.parser
|
||||
|
||||
|
||||
"""
|
||||
publication:
|
||||
@ -21,7 +24,8 @@ publication:
|
||||
|
||||
"""
|
||||
|
||||
def group (items, key=lambda x: x):
|
||||
|
||||
def group(items, key=lambda x: x):
|
||||
""" returns a list of lists, of items grouped by a key function """
|
||||
ret = []
|
||||
keys = {}
|
||||
@ -35,10 +39,12 @@ def group (items, key=lambda x: x):
|
||||
ret.append(keys[k])
|
||||
return ret
|
||||
|
||||
|
||||
# def base (x):
|
||||
# return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
|
||||
|
||||
def splitextlong (x):
|
||||
|
||||
def splitextlong(x):
|
||||
""" split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """
|
||||
m = re.search(r"^(.*?)(\..*)$", x)
|
||||
if m:
|
||||
@ -46,20 +52,24 @@ def splitextlong (x):
|
||||
else:
|
||||
return x, ''
|
||||
|
||||
def base (x):
|
||||
|
||||
def base(x):
|
||||
return splitextlong(x)[0]
|
||||
|
||||
def excerpt (t, chars=25):
|
||||
|
||||
def excerpt(t, chars=25):
|
||||
if len(t) > chars:
|
||||
t = t[:chars] + "..."
|
||||
return t
|
||||
|
||||
def absurl (url, base=None):
|
||||
|
||||
def absurl(url, base=None):
|
||||
if not url.startswith("http"):
|
||||
return base + url
|
||||
return url
|
||||
|
||||
def url_base (url):
|
||||
|
||||
def url_base(url):
|
||||
(scheme, netloc, path, params, query, fragment) = urlparse(url)
|
||||
path, _ = os.path.split(path.lstrip("/"))
|
||||
ret = urlunparse((scheme, netloc, path, None, None, None))
|
||||
@ -67,45 +77,131 @@ def url_base (url):
|
||||
ret += "/"
|
||||
return ret
|
||||
|
||||
def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'):
|
||||
|
||||
def datetimeformat(t, format='%Y-%m-%d %H:%M:%S'):
|
||||
if type(t) == str:
|
||||
dt = dateutil.parser.parse(t)
|
||||
return dt.strftime(format)
|
||||
else:
|
||||
return time.strftime(format, time.localtime(t))
|
||||
|
||||
def main (args):
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("Convert dumped files to a document via a template.")
|
||||
|
||||
p.add_argument("input", nargs="+", help="Files to list (.meta.json files)")
|
||||
|
||||
p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in")
|
||||
p.add_argument("--template", default="publication.html", help="template name, built-ins include publication.html; default: publication.html")
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: ./.etherdump/settings.json")
|
||||
p.add_argument(
|
||||
"--templatepath",
|
||||
default=None,
|
||||
help="path to find templates, default: built-in",
|
||||
)
|
||||
p.add_argument(
|
||||
"--template",
|
||||
default="publication.html",
|
||||
help="template name, built-ins include publication.html; default: publication.html",
|
||||
)
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: ./.etherdump/settings.json",
|
||||
)
|
||||
# p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
|
||||
|
||||
p.add_argument("--order", default="padid", help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid")
|
||||
p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)")
|
||||
p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)")
|
||||
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
|
||||
p.add_argument(
|
||||
"--order",
|
||||
default="padid",
|
||||
help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid",
|
||||
)
|
||||
p.add_argument(
|
||||
"--reverse",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="reverse order, default: False (reverse chrono)",
|
||||
)
|
||||
p.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=0,
|
||||
help="limit to number of items, default: 0 (no limit)",
|
||||
)
|
||||
p.add_argument(
|
||||
"--skip",
|
||||
default=None,
|
||||
type=int,
|
||||
help="skip this many items, default: None",
|
||||
)
|
||||
|
||||
p.add_argument("--content", default=False, action="store_true", help="rss: include (full) content tag, default: False")
|
||||
p.add_argument("--link", default="diffhtml,html,text", help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text")
|
||||
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl")
|
||||
p.add_argument(
|
||||
"--content",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="rss: include (full) content tag, default: False",
|
||||
)
|
||||
p.add_argument(
|
||||
"--link",
|
||||
default="diffhtml,html,text",
|
||||
help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text",
|
||||
)
|
||||
p.add_argument(
|
||||
"--linkbase",
|
||||
default=None,
|
||||
help="base url to use for links, default: try to use the feedurl",
|
||||
)
|
||||
p.add_argument("--output", default=None, help="output, default: stdout")
|
||||
|
||||
p.add_argument("--files", default=False, action="store_true", help="include files (experimental)")
|
||||
p.add_argument(
|
||||
"--files",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="include files (experimental)",
|
||||
)
|
||||
|
||||
pg = p.add_argument_group('template variables')
|
||||
pg.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml")
|
||||
pg.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url")
|
||||
pg.add_argument("--title", default="etherpump", help="title for document or rss feed channel title, default: etherdump")
|
||||
pg.add_argument("--description", default="", help="rss: channel description, default: empty")
|
||||
pg.add_argument("--language", default="en-US", help="rss: feed language, default: en-US")
|
||||
pg.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily")
|
||||
pg.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1")
|
||||
pg.add_argument("--generator", default="https://gitlab.com/activearchives/etherpump", help="generator, default: https://gitlab.com/activearchives/etherdump")
|
||||
pg.add_argument("--timestamp", default=None, help="timestamp, default: now (e.g. 2015-12-01 12:30:00)")
|
||||
pg.add_argument(
|
||||
"--feedurl",
|
||||
default="feed.xml",
|
||||
help="rss: to use as feeds own (self) link, default: feed.xml",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--siteurl",
|
||||
default=None,
|
||||
help="rss: to use as channel's site link, default: the etherpad url",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--title",
|
||||
default="etherpump",
|
||||
help="title for document or rss feed channel title, default: etherdump",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--description",
|
||||
default="",
|
||||
help="rss: channel description, default: empty",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--language", default="en-US", help="rss: feed language, default: en-US"
|
||||
)
|
||||
pg.add_argument(
|
||||
"--updatePeriod",
|
||||
default="daily",
|
||||
help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--updateFrequency",
|
||||
default=1,
|
||||
type=int,
|
||||
help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--generator",
|
||||
default="https://gitlab.com/activearchives/etherpump",
|
||||
help="generator, default: https://gitlab.com/activearchives/etherdump",
|
||||
)
|
||||
pg.add_argument(
|
||||
"--timestamp",
|
||||
default=None,
|
||||
help="timestamp, default: now (e.g. 2015-12-01 12:30:00)",
|
||||
)
|
||||
pg.add_argument("--next", default=None, help="next link, default: None)")
|
||||
pg.add_argument("--prev", default=None, help="prev link, default: None")
|
||||
|
||||
@ -130,17 +226,12 @@ def main (args):
|
||||
# Use "base" to strip (longest) extensions
|
||||
# inputs = group(inputs, base)
|
||||
|
||||
def wrappath (p):
|
||||
def wrappath(p):
|
||||
path = "./{0}".format(p)
|
||||
ext = os.path.splitext(p)[1][1:]
|
||||
return {
|
||||
"url": path,
|
||||
"path": path,
|
||||
"code": 200,
|
||||
"type": ext
|
||||
}
|
||||
return {"url": path, "path": path, "code": 200, "type": ext}
|
||||
|
||||
def metaforpaths (paths):
|
||||
def metaforpaths(paths):
|
||||
ret = {}
|
||||
pid = base(paths[0])
|
||||
ret['pad'] = ret['padid'] = pid
|
||||
@ -150,7 +241,9 @@ def main (args):
|
||||
mtime = os.stat(p).st_mtime
|
||||
if lastedited == None or mtime > lastedited:
|
||||
lastedited = mtime
|
||||
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S")
|
||||
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime(
|
||||
"%Y-%m-%dT%H:%M:%S"
|
||||
)
|
||||
ret["lastedited_raw"] = mtime
|
||||
return ret
|
||||
|
||||
@ -170,7 +263,7 @@ def main (args):
|
||||
# else:
|
||||
# return metaforpaths(paths)
|
||||
|
||||
def fixdates (padmeta):
|
||||
def fixdates(padmeta):
|
||||
d = dateutil.parser.parse(padmeta["lastedited_iso"])
|
||||
padmeta["lastedited"] = d
|
||||
padmeta["lastedited_822"] = d.strftime("%a, %d %b %Y %H:%M:%S +0000")
|
||||
@ -181,17 +274,21 @@ def main (args):
|
||||
pads = list(map(fixdates, pads))
|
||||
args.pads = list(pads)
|
||||
|
||||
def could_have_base (x, y):
|
||||
return x == y or (x.startswith(y) and x[len(y):].startswith("."))
|
||||
def could_have_base(x, y):
|
||||
return x == y or (x.startswith(y) and x[len(y) :].startswith("."))
|
||||
|
||||
def get_best_pad (x):
|
||||
def get_best_pad(x):
|
||||
for pb in padbases:
|
||||
p = pads_by_base[pb]
|
||||
if could_have_base(x, pb):
|
||||
return p
|
||||
|
||||
def has_version (padinfo, path):
|
||||
return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path]
|
||||
def has_version(padinfo, path):
|
||||
return [
|
||||
x
|
||||
for x in padinfo['versions']
|
||||
if 'path' in x and x['path'] == "./" + path
|
||||
]
|
||||
|
||||
if args.files:
|
||||
inputs = args.input
|
||||
@ -209,25 +306,33 @@ def main (args):
|
||||
# print ("PADBASES", file=sys.stderr)
|
||||
# for pb in padbases:
|
||||
# print (" ", pb, file=sys.stderr)
|
||||
print ("pairing input files with pads", file=sys.stderr)
|
||||
print("pairing input files with pads", file=sys.stderr)
|
||||
for x in inputs:
|
||||
# pair input with a pad if possible
|
||||
xbasename = os.path.basename(x)
|
||||
p = get_best_pad(xbasename)
|
||||
if p:
|
||||
if not has_version(p, x):
|
||||
print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr)
|
||||
print(
|
||||
"Grouping file {0} with pad {1}".format(x, p['padid']),
|
||||
file=sys.stderr,
|
||||
)
|
||||
p['versions'].append(wrappath(x))
|
||||
else:
|
||||
print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr)
|
||||
print(
|
||||
"Skipping existing version {0} ({1})...".format(
|
||||
x, p['padid']
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
removelist.append(x)
|
||||
# Removed Matches files
|
||||
for x in removelist:
|
||||
inputs.remove(x)
|
||||
print ("Remaining files:", file=sys.stderr)
|
||||
print("Remaining files:", file=sys.stderr)
|
||||
for x in inputs:
|
||||
print (x, file=sys.stderr)
|
||||
print (file=sys.stderr)
|
||||
print(x, file=sys.stderr)
|
||||
print(file=sys.stderr)
|
||||
# Add "fake" pads for remaining files
|
||||
for x in inputs:
|
||||
args.pads.append(metaforpaths([x]))
|
||||
@ -243,7 +348,9 @@ def main (args):
|
||||
|
||||
# order items & apply limit
|
||||
if args.order == "lastedited":
|
||||
args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse)
|
||||
args.pads.sort(
|
||||
key=lambda x: x.get("lastedited_iso"), reverse=args.reverse
|
||||
)
|
||||
elif args.order == "pad":
|
||||
args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse)
|
||||
elif args.order == "padid":
|
||||
@ -251,17 +358,20 @@ def main (args):
|
||||
elif args.order == "revisions":
|
||||
args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse)
|
||||
elif args.order == "authors":
|
||||
args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse)
|
||||
args.pads.sort(
|
||||
key=lambda x: len(x.get("authors")), reverse=args.reverse
|
||||
)
|
||||
elif args.order == "custom":
|
||||
|
||||
# TODO: make this list non-static, but a variable that can be given from the CLI
|
||||
# TODO: make this list non-static, but a variable that can be given from the CLI
|
||||
|
||||
customorder = [
|
||||
'nooo.relearn.preamble',
|
||||
'nooo.relearn.activating.the.archive',
|
||||
'nooo.relearn.call.for.proposals',
|
||||
'nooo.relearn.call.for.proposals-proposal-footnote',
|
||||
'nooo.relearn.colophon']
|
||||
'nooo.relearn.colophon',
|
||||
]
|
||||
order = []
|
||||
for x in customorder:
|
||||
for pad in args.pads:
|
||||
@ -272,7 +382,7 @@ def main (args):
|
||||
raise Exception("That ordering is not implemented!")
|
||||
|
||||
if args.limit:
|
||||
args.pads = args.pads[:args.limit]
|
||||
args.pads = args.pads[: args.limit]
|
||||
|
||||
# add versions_by_type, add in full text
|
||||
# add link (based on args.link)
|
||||
@ -289,7 +399,7 @@ def main (args):
|
||||
|
||||
if "text" in versions_by_type:
|
||||
# try:
|
||||
with open (versions_by_type["text"]["path"]) as f:
|
||||
with open(versions_by_type["text"]["path"]) as f:
|
||||
content = f.read()
|
||||
# print('content:', content)
|
||||
# [Relearn] Add pandoc command here?
|
||||
@ -297,7 +407,7 @@ def main (args):
|
||||
# print('html:', html)
|
||||
p["text"] = html
|
||||
# except FileNotFoundError:
|
||||
# p['text'] = 'ERROR'
|
||||
# p['text'] = 'ERROR'
|
||||
|
||||
# ADD IN LINK TO PAD AS "link"
|
||||
for v in linkversions:
|
||||
@ -312,6 +422,6 @@ def main (args):
|
||||
|
||||
if args.output:
|
||||
with open(args.output, "w") as f:
|
||||
print (template.render(vars(args)), file=f)
|
||||
print(template.render(vars(args)), file=f)
|
||||
else:
|
||||
print (template.render(vars(args)))
|
||||
print(template.render(vars(args)))
|
||||
|
@ -1,17 +1,19 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
import sys, json, re, os
|
||||
from datetime import datetime
|
||||
from fnmatch import fnmatch
|
||||
from time import sleep
|
||||
from urllib.parse import quote, urlencode
|
||||
from urllib.request import HTTPError, URLError, urlopen
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
from urllib.parse import urlencode, quote
|
||||
from urllib.request import urlopen, URLError, HTTPError
|
||||
import html5lib
|
||||
|
||||
from etherpump.commands.common import *
|
||||
from time import sleep
|
||||
from etherpump.commands.html5tidy import html5tidy
|
||||
import html5lib
|
||||
from xml.etree import ElementTree as ET
|
||||
from fnmatch import fnmatch
|
||||
|
||||
# debugging
|
||||
# import ElementTree as ET
|
||||
@ -28,43 +30,144 @@ use/prefer public interfaces ? (export functions)
|
||||
|
||||
"""
|
||||
|
||||
def try_deleting (files):
|
||||
|
||||
def try_deleting(files):
|
||||
for f in files:
|
||||
try:
|
||||
os.remove(f)
|
||||
except OSError as e:
|
||||
pass
|
||||
|
||||
def main (args):
|
||||
p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)")
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser(
|
||||
"Check for pads that have changed since last sync (according to .meta.json)"
|
||||
)
|
||||
|
||||
p.add_argument("padid", nargs="*", default=[])
|
||||
p.add_argument("--glob", default=False, help="download pads matching a glob pattern")
|
||||
p.add_argument(
|
||||
"--glob", default=False, help="download pads matching a glob pattern"
|
||||
)
|
||||
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherpump/settings.json")
|
||||
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
|
||||
p.add_argument("--pub", default="p", help="folder to store files for public pads, default: p")
|
||||
p.add_argument("--group", default="g", help="folder to store files for group pads, default: g")
|
||||
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
|
||||
p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False")
|
||||
p.add_argument("--text", default=False, action="store_true", help="download text to PADID.txt, default: False")
|
||||
p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False")
|
||||
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.diff.html, default: False")
|
||||
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")
|
||||
p.add_argument("--folder", default=False, action="store_true", help="dump files in a folder named PADID (meta, text, html, dhtml), default: False")
|
||||
p.add_argument("--output", default=False, action="store_true", help="output changed padids on stdout")
|
||||
p.add_argument("--force", default=False, action="store_true", help="reload, even if revisions count matches previous")
|
||||
p.add_argument("--no-raw-ext", default=False, action="store_true", help="save plain text as padname with no (additional) extension")
|
||||
p.add_argument("--fix-names", default=False, action="store_true", help="normalize padid's (no spaces, special control chars) for use in file names")
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: .etherpump/settings.json",
|
||||
)
|
||||
p.add_argument(
|
||||
"--zerorevs",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)",
|
||||
)
|
||||
p.add_argument(
|
||||
"--pub",
|
||||
default="p",
|
||||
help="folder to store files for public pads, default: p",
|
||||
)
|
||||
p.add_argument(
|
||||
"--group",
|
||||
default="g",
|
||||
help="folder to store files for group pads, default: g",
|
||||
)
|
||||
p.add_argument(
|
||||
"--skip",
|
||||
default=None,
|
||||
type=int,
|
||||
help="skip this many items, default: None",
|
||||
)
|
||||
p.add_argument(
|
||||
"--meta",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="download meta to PADID.meta.json, default: False",
|
||||
)
|
||||
p.add_argument(
|
||||
"--text",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="download text to PADID.txt, default: False",
|
||||
)
|
||||
p.add_argument(
|
||||
"--html",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="download html to PADID.html, default: False",
|
||||
)
|
||||
p.add_argument(
|
||||
"--dhtml",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="download dhtml to PADID.diff.html, default: False",
|
||||
)
|
||||
p.add_argument(
|
||||
"--all",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="download all files (meta, text, html, dhtml), default: False",
|
||||
)
|
||||
p.add_argument(
|
||||
"--folder",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="dump files in a folder named PADID (meta, text, html, dhtml), default: False",
|
||||
)
|
||||
p.add_argument(
|
||||
"--output",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="output changed padids on stdout",
|
||||
)
|
||||
p.add_argument(
|
||||
"--force",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="reload, even if revisions count matches previous",
|
||||
)
|
||||
p.add_argument(
|
||||
"--no-raw-ext",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="save plain text as padname with no (additional) extension",
|
||||
)
|
||||
p.add_argument(
|
||||
"--fix-names",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="normalize padid's (no spaces, special control chars) for use in file names",
|
||||
)
|
||||
|
||||
p.add_argument("--filter-ext", default=None, help="filter pads by extension")
|
||||
p.add_argument(
|
||||
"--filter-ext", default=None, help="filter pads by extension"
|
||||
)
|
||||
|
||||
p.add_argument("--css", default="/styles.css", help="add css url to output pages, default: /styles.css")
|
||||
p.add_argument("--script", default="/versions.js", help="add script url to output pages, default: /versions.js")
|
||||
p.add_argument(
|
||||
"--css",
|
||||
default="/styles.css",
|
||||
help="add css url to output pages, default: /styles.css",
|
||||
)
|
||||
p.add_argument(
|
||||
"--script",
|
||||
default="/versions.js",
|
||||
help="add script url to output pages, default: /versions.js",
|
||||
)
|
||||
|
||||
p.add_argument("--nopublish", default="__NOPUBLISH__", help="no publish magic word, default: __NOPUBLISH__")
|
||||
p.add_argument("--publish", default="__PUBLISH__", help="the publish magic word, default: __PUBLISH__")
|
||||
p.add_argument("--publish-opt-in", default=False, action="store_true", help="ensure `--publish` is honoured instead of `--nopublish`")
|
||||
p.add_argument(
|
||||
"--nopublish",
|
||||
default="__NOPUBLISH__",
|
||||
help="no publish magic word, default: __NOPUBLISH__",
|
||||
)
|
||||
p.add_argument(
|
||||
"--publish",
|
||||
default="__PUBLISH__",
|
||||
help="the publish magic word, default: __PUBLISH__",
|
||||
)
|
||||
p.add_argument(
|
||||
"--publish-opt-in",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="ensure `--publish` is honoured instead of `--nopublish`",
|
||||
)
|
||||
|
||||
args = p.parse_args(args)
|
||||
|
||||
@ -79,16 +182,20 @@ def main (args):
|
||||
if args.padid:
|
||||
padids = args.padid
|
||||
elif args.glob:
|
||||
padids = getjson(info['localapiurl']+'listAllPads?'+urlencode(data))['data']['padIDs']
|
||||
padids = getjson(
|
||||
info['localapiurl'] + 'listAllPads?' + urlencode(data)
|
||||
)['data']['padIDs']
|
||||
padids = [x for x in padids if fnmatch(x, args.glob)]
|
||||
else:
|
||||
padids = getjson(info['localapiurl']+'listAllPads?'+urlencode(data))['data']['padIDs']
|
||||
padids = getjson(
|
||||
info['localapiurl'] + 'listAllPads?' + urlencode(data)
|
||||
)['data']['padIDs']
|
||||
padids.sort()
|
||||
numpads = len(padids)
|
||||
# maxmsglen = 0
|
||||
count = 0
|
||||
for i, padid in enumerate(padids):
|
||||
if args.skip != None and i<args.skip:
|
||||
if args.skip != None and i < args.skip:
|
||||
continue
|
||||
progressbar(i, numpads, padid)
|
||||
|
||||
@ -110,47 +217,73 @@ def main (args):
|
||||
if os.path.exists(metapath):
|
||||
with open(metapath) as f:
|
||||
meta.update(json.load(f))
|
||||
revisions = getjson(info['localapiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
|
||||
revisions = getjson(
|
||||
info['localapiurl']
|
||||
+ 'getRevisionsCount?'
|
||||
+ urlencode(data)
|
||||
)['data']['revisions']
|
||||
if meta['revisions'] == revisions and not args.force:
|
||||
skip=True
|
||||
skip = True
|
||||
break
|
||||
|
||||
meta['padid'] = padid # .encode("utf-8")
|
||||
meta['padid'] = padid # .encode("utf-8")
|
||||
versions = meta["versions"] = []
|
||||
versions.append({
|
||||
"url": padurlbase + quote(padid),
|
||||
"type": "pad",
|
||||
"code": 200
|
||||
})
|
||||
versions.append(
|
||||
{
|
||||
"url": padurlbase + quote(padid),
|
||||
"type": "pad",
|
||||
"code": 200,
|
||||
}
|
||||
)
|
||||
|
||||
if revisions == None:
|
||||
meta['revisions'] = getjson(info['localapiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
|
||||
meta['revisions'] = getjson(
|
||||
info['localapiurl']
|
||||
+ 'getRevisionsCount?'
|
||||
+ urlencode(data)
|
||||
)['data']['revisions']
|
||||
else:
|
||||
meta['revisions' ] = revisions
|
||||
meta['revisions'] = revisions
|
||||
|
||||
if (meta['revisions'] == 0) and (not args.zerorevs):
|
||||
# print("Skipping zero revs", file=sys.stderr)
|
||||
skip=True
|
||||
skip = True
|
||||
break
|
||||
|
||||
# todo: load more metadata!
|
||||
meta['group'], meta['pad'] = splitpadname(padid)
|
||||
meta['pathbase'] = p
|
||||
meta['lastedited_raw'] = int(getjson(info['localapiurl']+'getLastEdited?'+urlencode(data))['data']['lastEdited'])
|
||||
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat()
|
||||
meta['author_ids'] = getjson(info['localapiurl']+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
|
||||
meta['lastedited_raw'] = int(
|
||||
getjson(
|
||||
info['localapiurl'] + 'getLastEdited?' + urlencode(data)
|
||||
)['data']['lastEdited']
|
||||
)
|
||||
meta['lastedited_iso'] = datetime.fromtimestamp(
|
||||
int(meta['lastedited_raw']) / 1000
|
||||
).isoformat()
|
||||
meta['author_ids'] = getjson(
|
||||
info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
|
||||
)['data']['authorIDs']
|
||||
break
|
||||
except HTTPError as e:
|
||||
tries += 1
|
||||
if tries > 3:
|
||||
print ("Too many failures ({0}), skipping".format(padid), file=sys.stderr)
|
||||
skip=True
|
||||
print(
|
||||
"Too many failures ({0}), skipping".format(padid),
|
||||
file=sys.stderr,
|
||||
)
|
||||
skip = True
|
||||
break
|
||||
else:
|
||||
sleep(3)
|
||||
except TypeError as e:
|
||||
print ("Type Error loading pad {0} (phantom pad?), skipping".format(padid), file=sys.stderr)
|
||||
skip=True
|
||||
print(
|
||||
"Type Error loading pad {0} (phantom pad?), skipping".format(
|
||||
padid
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
skip = True
|
||||
break
|
||||
|
||||
if skip:
|
||||
@ -159,7 +292,7 @@ def main (args):
|
||||
count += 1
|
||||
|
||||
if args.output:
|
||||
print (padid)
|
||||
print(padid)
|
||||
|
||||
if args.all or (args.meta or args.text or args.html or args.dhtml):
|
||||
try:
|
||||
@ -168,7 +301,7 @@ def main (args):
|
||||
pass
|
||||
|
||||
if args.all or args.text:
|
||||
text = getjson(info['localapiurl']+'getText?'+urlencode(data))
|
||||
text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))
|
||||
ver = {"type": "text"}
|
||||
versions.append(ver)
|
||||
ver["code"] = text["_code"]
|
||||
@ -180,17 +313,31 @@ def main (args):
|
||||
##########################################
|
||||
if args.nopublish and args.nopublish in text:
|
||||
# NEED TO PURGE ANY EXISTING DOCS
|
||||
try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json"))
|
||||
try_deleting(
|
||||
(
|
||||
p + raw_ext,
|
||||
p + ".raw.html",
|
||||
p + ".diff.html",
|
||||
p + ".meta.json",
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
##########################################
|
||||
## ENFORCE __PUBLISH__ MAGIC WORD
|
||||
##########################################
|
||||
if args.publish_opt_in and args.publish not in text:
|
||||
try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json"))
|
||||
try_deleting(
|
||||
(
|
||||
p + raw_ext,
|
||||
p + ".raw.html",
|
||||
p + ".diff.html",
|
||||
p + ".meta.json",
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
ver["path"] = p+raw_ext
|
||||
ver["path"] = p + raw_ext
|
||||
ver["url"] = quote(ver["path"])
|
||||
with open(ver["path"], "w") as f:
|
||||
f.write(text)
|
||||
@ -199,38 +346,86 @@ def main (args):
|
||||
|
||||
links = []
|
||||
if args.css:
|
||||
links.append({"href":args.css, "rel":"stylesheet"})
|
||||
links.append({"href": args.css, "rel": "stylesheet"})
|
||||
# todo, make this process reflect which files actually were made
|
||||
versionbaseurl = quote(padid)
|
||||
links.append({"href":versions[0]["url"], "rel":"alternate", "type":"text/html", "title":"Etherpad"})
|
||||
links.append(
|
||||
{
|
||||
"href": versions[0]["url"],
|
||||
"rel": "alternate",
|
||||
"type": "text/html",
|
||||
"title": "Etherpad",
|
||||
}
|
||||
)
|
||||
if args.all or args.text:
|
||||
links.append({"href":versionbaseurl+raw_ext, "rel":"alternate", "type":"text/plain", "title":"Plain text"})
|
||||
links.append(
|
||||
{
|
||||
"href": versionbaseurl + raw_ext,
|
||||
"rel": "alternate",
|
||||
"type": "text/plain",
|
||||
"title": "Plain text",
|
||||
}
|
||||
)
|
||||
if args.all or args.html:
|
||||
links.append({"href":versionbaseurl+".raw.html", "rel":"alternate", "type":"text/html", "title":"HTML"})
|
||||
links.append(
|
||||
{
|
||||
"href": versionbaseurl + ".raw.html",
|
||||
"rel": "alternate",
|
||||
"type": "text/html",
|
||||
"title": "HTML",
|
||||
}
|
||||
)
|
||||
if args.all or args.dhtml:
|
||||
links.append({"href":versionbaseurl+".diff.html", "rel":"alternate", "type":"text/html", "title":"HTML with author colors"})
|
||||
links.append(
|
||||
{
|
||||
"href": versionbaseurl + ".diff.html",
|
||||
"rel": "alternate",
|
||||
"type": "text/html",
|
||||
"title": "HTML with author colors",
|
||||
}
|
||||
)
|
||||
if args.all or args.meta:
|
||||
links.append({"href":versionbaseurl+".meta.json", "rel":"alternate", "type":"application/json", "title":"Meta data"})
|
||||
links.append(
|
||||
{
|
||||
"href": versionbaseurl + ".meta.json",
|
||||
"rel": "alternate",
|
||||
"type": "application/json",
|
||||
"title": "Meta data",
|
||||
}
|
||||
)
|
||||
|
||||
# links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"})
|
||||
|
||||
if args.all or args.dhtml:
|
||||
data['startRev'] = "0"
|
||||
html = getjson(info['localapiurl']+'createDiffHTML?'+urlencode(data))
|
||||
html = getjson(
|
||||
info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
|
||||
)
|
||||
ver = {"type": "diffhtml"}
|
||||
versions.append(ver)
|
||||
ver["code"] = html["_code"]
|
||||
if html["_code"] == 200:
|
||||
try:
|
||||
html = html['data']['html']
|
||||
ver["path"] = p+".diff.html"
|
||||
ver["path"] = p + ".diff.html"
|
||||
ver["url"] = quote(ver["path"])
|
||||
# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
|
||||
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
|
||||
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
|
||||
doc = html5lib.parse(
|
||||
html, treebuilder="etree", namespaceHTMLElements=False
|
||||
)
|
||||
html5tidy(
|
||||
doc,
|
||||
indent=True,
|
||||
title=padid,
|
||||
scripts=args.script,
|
||||
links=links,
|
||||
)
|
||||
with open(ver["path"], "w") as f:
|
||||
# f.write(html.encode("utf-8"))
|
||||
print(ET.tostring(doc, method="html", encoding="unicode"), file=f)
|
||||
print(
|
||||
ET.tostring(doc, method="html", encoding="unicode"),
|
||||
file=f,
|
||||
)
|
||||
except TypeError:
|
||||
# Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!
|
||||
ver["message"] = html["message"]
|
||||
@ -239,19 +434,30 @@ def main (args):
|
||||
|
||||
# Process text, html, dhtml, all options
|
||||
if args.all or args.html:
|
||||
html = getjson(info['localapiurl']+'getHTML?'+urlencode(data))
|
||||
html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))
|
||||
ver = {"type": "html"}
|
||||
versions.append(ver)
|
||||
ver["code"] = html["_code"]
|
||||
if html["_code"] == 200:
|
||||
html = html['data']['html']
|
||||
ver["path"] = p+".raw.html"
|
||||
ver["path"] = p + ".raw.html"
|
||||
ver["url"] = quote(ver["path"])
|
||||
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
|
||||
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
|
||||
doc = html5lib.parse(
|
||||
html, treebuilder="etree", namespaceHTMLElements=False
|
||||
)
|
||||
html5tidy(
|
||||
doc,
|
||||
indent=True,
|
||||
title=padid,
|
||||
scripts=args.script,
|
||||
links=links,
|
||||
)
|
||||
with open(ver["path"], "w") as f:
|
||||
# f.write(html.encode("utf-8"))
|
||||
print (ET.tostring(doc, method="html", encoding="unicode"), file=f)
|
||||
print(
|
||||
ET.tostring(doc, method="html", encoding="unicode"),
|
||||
file=f,
|
||||
)
|
||||
|
||||
# output meta
|
||||
if args.all or args.meta:
|
||||
|
@ -1,14 +1,18 @@
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import json
|
||||
from argparse import ArgumentParser
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen
|
||||
from urllib.error import HTTPError, URLError
|
||||
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("call getRevisionsCount for the given padid")
|
||||
p.add_argument("padid", help="the padid")
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: .etherdump/settings.json",
|
||||
)
|
||||
p.add_argument("--showurl", default=False, action="store_true")
|
||||
args = p.parse_args(args)
|
||||
|
||||
@ -18,9 +22,9 @@ def main(args):
|
||||
data = {}
|
||||
data['apikey'] = info['apikey']
|
||||
data['padID'] = args.padid.encode("utf-8")
|
||||
requesturl = apiurl+'getRevisionsCount?'+urlencode(data)
|
||||
requesturl = apiurl + 'getRevisionsCount?' + urlencode(data)
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
print(requesturl)
|
||||
else:
|
||||
results = json.load(urlopen(requesturl))['data']['revisions']
|
||||
print (results)
|
||||
print(results)
|
||||
|
@ -1,39 +1,60 @@
|
||||
|
||||
import json
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
import json, sys
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen
|
||||
from urllib.error import HTTPError, URLError
|
||||
|
||||
import requests
|
||||
|
||||
LIMIT_BYTES = 100 * 1000
|
||||
|
||||
LIMIT_BYTES = 100*1000
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("calls the setHTML API function for the given padid")
|
||||
p.add_argument("padid", help="the padid")
|
||||
p.add_argument("--html", default=None, help="html, default: read from stdin")
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
p.add_argument(
|
||||
"--html", default=None, help="html, default: read from stdin"
|
||||
)
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: .etherdump/settings.json",
|
||||
)
|
||||
p.add_argument("--showurl", default=False, action="store_true")
|
||||
# p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
|
||||
p.add_argument("--create", default=False, action="store_true", help="flag to create pad if necessary")
|
||||
p.add_argument("--limit", default=False, action="store_true", help="limit text to 100k (etherpad limit)")
|
||||
p.add_argument(
|
||||
"--create",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="flag to create pad if necessary",
|
||||
)
|
||||
p.add_argument(
|
||||
"--limit",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="limit text to 100k (etherpad limit)",
|
||||
)
|
||||
args = p.parse_args(args)
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
info = json.load(f)
|
||||
apiurl = info.get("apiurl")
|
||||
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
||||
# data = {}
|
||||
# data['apikey'] = info['apikey']
|
||||
# data['padID'] = args.padid # is utf-8 encoded
|
||||
# data = {}
|
||||
# data['apikey'] = info['apikey']
|
||||
# data['padID'] = args.padid # is utf-8 encoded
|
||||
|
||||
createPad = False
|
||||
if args.create:
|
||||
# check if it's in fact necessary
|
||||
requesturl = apiurl+'getRevisionsCount?'+urlencode({'apikey': info['apikey'], 'padID': args.padid})
|
||||
requesturl = (
|
||||
apiurl
|
||||
+ 'getRevisionsCount?'
|
||||
+ urlencode({'apikey': info['apikey'], 'padID': args.padid})
|
||||
)
|
||||
results = json.load(urlopen(requesturl))
|
||||
print (json.dumps(results, indent=2), file=sys.stderr)
|
||||
print(json.dumps(results, indent=2), file=sys.stderr)
|
||||
if results['code'] != 0:
|
||||
createPad = True
|
||||
|
||||
@ -47,21 +68,27 @@ def main(args):
|
||||
params['padID'] = args.padid
|
||||
|
||||
if createPad:
|
||||
requesturl = apiurl+'createPad'
|
||||
requesturl = apiurl + 'createPad'
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
results = requests.post(requesturl, params=params, data={'text': ''}) # json.load(urlopen(requesturl))
|
||||
print(requesturl)
|
||||
results = requests.post(
|
||||
requesturl, params=params, data={'text': ''}
|
||||
) # json.load(urlopen(requesturl))
|
||||
results = json.loads(results.text)
|
||||
print (json.dumps(results, indent=2))
|
||||
print(json.dumps(results, indent=2))
|
||||
|
||||
if len(html) > LIMIT_BYTES and args.limit:
|
||||
print ("limiting", len(text), LIMIT_BYTES, file=sys.stderr)
|
||||
print("limiting", len(text), LIMIT_BYTES, file=sys.stderr)
|
||||
html = html[:LIMIT_BYTES]
|
||||
|
||||
requesturl = apiurl+'setHTML'
|
||||
requesturl = apiurl + 'setHTML'
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
print(requesturl)
|
||||
# params['html'] = html
|
||||
results = requests.post(requesturl, params={'apikey': info['apikey']}, data={'apikey': info['apikey'], 'padID': args.padid, 'html': html}) # json.load(urlopen(requesturl))
|
||||
results = requests.post(
|
||||
requesturl,
|
||||
params={'apikey': info['apikey']},
|
||||
data={'apikey': info['apikey'], 'padID': args.padid, 'html': html},
|
||||
) # json.load(urlopen(requesturl))
|
||||
results = json.loads(results.text)
|
||||
print (json.dumps(results, indent=2))
|
||||
print(json.dumps(results, indent=2))
|
||||
|
@ -1,24 +1,39 @@
|
||||
|
||||
import json
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
import json, sys
|
||||
|
||||
from urllib.parse import urlencode, quote
|
||||
from urllib.request import urlopen, URLError, HTTPError
|
||||
from urllib.parse import quote, urlencode
|
||||
from urllib.request import HTTPError, URLError, urlopen
|
||||
|
||||
import requests
|
||||
|
||||
LIMIT_BYTES = 100 * 1000
|
||||
|
||||
LIMIT_BYTES = 100*1000
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser("calls the getText API function for the given padid")
|
||||
p.add_argument("padid", help="the padid")
|
||||
p.add_argument("--text", default=None, help="text, default: read from stdin")
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
p.add_argument(
|
||||
"--text", default=None, help="text, default: read from stdin"
|
||||
)
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: .etherdump/settings.json",
|
||||
)
|
||||
p.add_argument("--showurl", default=False, action="store_true")
|
||||
# p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
|
||||
p.add_argument("--create", default=False, action="store_true", help="flag to create pad if necessary")
|
||||
p.add_argument("--limit", default=False, action="store_true", help="limit text to 100k (etherpad limit)")
|
||||
p.add_argument(
|
||||
"--create",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="flag to create pad if necessary",
|
||||
)
|
||||
p.add_argument(
|
||||
"--limit",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="limit text to 100k (etherpad limit)",
|
||||
)
|
||||
args = p.parse_args(args)
|
||||
|
||||
with open(args.padinfo) as f:
|
||||
@ -27,11 +42,11 @@ def main(args):
|
||||
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
||||
data = {}
|
||||
data['apikey'] = info['apikey']
|
||||
data['padID'] = args.padid # is utf-8 encoded
|
||||
data['padID'] = args.padid # is utf-8 encoded
|
||||
|
||||
createPad = False
|
||||
if args.create:
|
||||
requesturl = apiurl+'getRevisionsCount?'+urlencode(data)
|
||||
requesturl = apiurl + 'getRevisionsCount?' + urlencode(data)
|
||||
results = json.load(urlopen(requesturl))
|
||||
# print (json.dumps(results, indent=2))
|
||||
if results['code'] != 0:
|
||||
@ -43,20 +58,26 @@ def main(args):
|
||||
text = sys.stdin.read()
|
||||
|
||||
if len(text) > LIMIT_BYTES and args.limit:
|
||||
print ("limiting", len(text), LIMIT_BYTES)
|
||||
print("limiting", len(text), LIMIT_BYTES)
|
||||
text = text[:LIMIT_BYTES]
|
||||
|
||||
data['text'] = text
|
||||
|
||||
if createPad:
|
||||
requesturl = apiurl+'createPad'
|
||||
requesturl = apiurl + 'createPad'
|
||||
else:
|
||||
requesturl = apiurl+'setText'
|
||||
requesturl = apiurl + 'setText'
|
||||
|
||||
if args.showurl:
|
||||
print (requesturl)
|
||||
results = requests.post(requesturl, params=data) # json.load(urlopen(requesturl))
|
||||
print(requesturl)
|
||||
results = requests.post(
|
||||
requesturl, params=data
|
||||
) # json.load(urlopen(requesturl))
|
||||
results = json.loads(results.text)
|
||||
if results['code'] != 0:
|
||||
print ("setText: ERROR ({0}) on pad {1}: {2}".format(results['code'], args.padid, results['message']))
|
||||
print(
|
||||
"setText: ERROR ({0}) on pad {1}: {2}".format(
|
||||
results['code'], args.padid, results['message']
|
||||
)
|
||||
)
|
||||
# json.dumps(results, indent=2)
|
||||
|
@ -1,17 +1,25 @@
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
import json, sys, re
|
||||
|
||||
from .common import *
|
||||
|
||||
|
||||
"""
|
||||
Extract and output selected fields of metadata
|
||||
"""
|
||||
|
||||
def main (args):
|
||||
p = ArgumentParser("extract & display meta data from a specific .meta.json file, or for a given padid (nb: it still looks for a .meta.json file)")
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser(
|
||||
"extract & display meta data from a specific .meta.json file, or for a given padid (nb: it still looks for a .meta.json file)"
|
||||
)
|
||||
p.add_argument("--path", default=None, help="read from a meta.json file")
|
||||
p.add_argument("--padid", default=None, help="read meta for this padid")
|
||||
p.add_argument("--format", default="{padid}", help="format str, default: {padid}")
|
||||
p.add_argument(
|
||||
"--format", default="{padid}", help="format str, default: {padid}"
|
||||
)
|
||||
args = p.parse_args(args)
|
||||
|
||||
path = args.path
|
||||
@ -19,7 +27,7 @@ def main (args):
|
||||
path = padpath(args.padid) + ".meta.json"
|
||||
|
||||
if not path:
|
||||
print ("Must specify either --path or --padid")
|
||||
print("Must specify either --path or --padid")
|
||||
sys.exit(-1)
|
||||
|
||||
with open(path) as f:
|
||||
@ -27,5 +35,4 @@ def main (args):
|
||||
|
||||
formatstr = args.format.decode("utf-8")
|
||||
formatstr = re.sub(r"{(\w+)}", r"{0[\1]}", formatstr)
|
||||
print (formatstr.format(meta).encode("utf-8"))
|
||||
|
||||
print(formatstr.format(meta).encode("utf-8"))
|
||||
|
@ -1,13 +1,17 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
import sys, json, re, os
|
||||
from datetime import datetime
|
||||
from math import ceil, floor
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import urlopen
|
||||
from urllib.error import HTTPError, URLError
|
||||
from math import ceil, floor
|
||||
|
||||
from .common import *
|
||||
|
||||
|
||||
"""
|
||||
status (meta):
|
||||
Update meta data files for those that have changed.
|
||||
@ -22,16 +26,18 @@ complicates the "syncing" idea....
|
||||
|
||||
"""
|
||||
|
||||
class PadItemException (Exception):
|
||||
|
||||
class PadItemException(Exception):
|
||||
pass
|
||||
|
||||
class PadItem ():
|
||||
def __init__ (self, padid=None, path=None, padexists=False):
|
||||
|
||||
class PadItem:
|
||||
def __init__(self, padid=None, path=None, padexists=False):
|
||||
self.padexists = padexists
|
||||
if padid and path:
|
||||
raise PadItemException("only give padid or path")
|
||||
if not (padid or path):
|
||||
raise PadItemException("either padid or path must be specified")
|
||||
raise PadItemException("either padid or path must be specified")
|
||||
if padid:
|
||||
self.padid = padid
|
||||
self.path = padpath(padid, group_path="g")
|
||||
@ -40,7 +46,7 @@ class PadItem ():
|
||||
self.padid = padpath2id(path)
|
||||
|
||||
@property
|
||||
def status (self):
|
||||
def status(self):
|
||||
if self.fileexists:
|
||||
if self.padexists:
|
||||
return "S"
|
||||
@ -52,26 +58,77 @@ class PadItem ():
|
||||
return "?"
|
||||
|
||||
@property
|
||||
def fileexists (self):
|
||||
def fileexists(self):
|
||||
return os.path.exists(self.path)
|
||||
|
||||
def ignore_p (path, settings=None):
|
||||
|
||||
def ignore_p(path, settings=None):
|
||||
if path.startswith("."):
|
||||
return True
|
||||
|
||||
def main (args):
|
||||
p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)")
|
||||
|
||||
def main(args):
|
||||
p = ArgumentParser(
|
||||
"Check for pads that have changed since last sync (according to .meta.json)"
|
||||
)
|
||||
# p.add_argument("padid", nargs="*", default=[])
|
||||
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
|
||||
p.add_argument("--pub", default=".", help="folder to store files for public pads, default: pub")
|
||||
p.add_argument("--group", default="g", help="folder to store files for group pads, default: g")
|
||||
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
|
||||
p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False")
|
||||
p.add_argument("--text", default=False, action="store_true", help="download text to PADID.txt, default: False")
|
||||
p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False")
|
||||
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.dhtml, default: False")
|
||||
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")
|
||||
p.add_argument(
|
||||
"--padinfo",
|
||||
default=".etherpump/settings.json",
|
||||
help="settings, default: .etherdump/settings.json",
|
||||
)
|
||||
p.add_argument(
|
||||
"--zerorevs",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)",
|
||||
)
|
||||
p.add_argument(
|
||||
"--pub",
|
||||
default=".",
|
||||
help="folder to store files for public pads, default: pub",
|
||||
)
|
||||
p.add_argument(
|
||||
"--group",
|
||||
default="g",
|
||||
help="folder to store files for group pads, default: g",
|
||||
)
|
||||
p.add_argument(
|
||||
"--skip",
|
||||
default=None,
|
||||
type=int,
|
||||
help="skip this many items, default: None",
|
||||
)
|
||||
p.add_argument(
|
||||
"--meta",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="download meta to PADID.meta.json, default: False",
|
||||
)
|
||||
p.add_argument(
|
||||
"--text",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="download text to PADID.txt, default: False",
|
||||
)
|
||||
p.add_argument(
|
||||
"--html",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="download html to PADID.html, default: False",
|
||||
)
|
||||
p.add_argument(
|
||||
"--dhtml",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="download dhtml to PADID.dhtml, default: False",
|
||||
)
|
||||
p.add_argument(
|
||||
"--all",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="download all files (meta, text, html, dhtml), default: False",
|
||||
)
|
||||
args = p.parse_args(args)
|
||||
|
||||
info = loadpadinfo(args.padinfo)
|
||||
@ -81,7 +138,9 @@ def main (args):
|
||||
padsbypath = {}
|
||||
|
||||
# listAllPads
|
||||
padids = getjson(info['apiurl']+'listAllPads?'+urlencode(data))['data']['padIDs']
|
||||
padids = getjson(info['apiurl'] + 'listAllPads?' + urlencode(data))['data'][
|
||||
'padIDs'
|
||||
]
|
||||
padids.sort()
|
||||
for padid in padids:
|
||||
pad = PadItem(padid=padid, padexists=True)
|
||||
@ -104,9 +163,9 @@ def main (args):
|
||||
if p.status != curstat:
|
||||
curstat = p.status
|
||||
if curstat == "F":
|
||||
print ("New/changed files")
|
||||
print("New/changed files")
|
||||
elif curstat == "P":
|
||||
print ("New/changed pads")
|
||||
print("New/changed pads")
|
||||
elif curstat == ".":
|
||||
print ("Up to date")
|
||||
print (" ", p.status, p.padid)
|
||||
print("Up to date")
|
||||
print(" ", p.status, p.padid)
|
||||
|
12
pyproject.toml
Normal file
12
pyproject.toml
Normal file
@ -0,0 +1,12 @@
|
||||
[build-system]
|
||||
requires = [
|
||||
"setuptools>=41.0.0",
|
||||
"setuptools-scm",
|
||||
"wheel",
|
||||
]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.black]
|
||||
line-length = 80
|
||||
target-version = ['py35', 'py36', 'py37']
|
||||
skip-string-normalization = true
|
9
setup.cfg
Normal file
9
setup.cfg
Normal file
@ -0,0 +1,9 @@
|
||||
[flake8]
|
||||
max-line-length = 80
|
||||
|
||||
[isort]
|
||||
known_first_party = etherpump
|
||||
line_length = 80
|
||||
multi_line_output = 3
|
||||
include_trailing_comma = True
|
||||
skip = .venv
|
Loading…
Reference in New Issue
Block a user