Add maintenance tools and run them

This commit is contained in:
Luke Murphy 2019-09-27 23:14:30 +02:00
parent 159165d2d5
commit 8f18594833
No known key found for this signature in database
GPG Key ID: 5E2EF5A63E3718CC
27 changed files with 1253 additions and 465 deletions

View File

@ -1,4 +1,13 @@
SOURCE_DIRS := bin/ etherpump/
publish:
@rm -rf dist
@python setup.py bdist_wheel
@twine upload dist/*
format:
@black $(SOURCE_DIRS)
@isort -rc $(SOURCE_DIRS)
lint:
@flake8 $(SOURCE_DIRS)

View File

@ -126,6 +126,23 @@ Publishing
You should have a [PyPi](https://pypi.org/) account and be added as an owner/maintainer on the [etherpump package](https://pypi.org/project/etherpump/).
Maintenance utilities
---------------------
Tools to help things stay tidy over time.
```bash
$ pip install flake8 isort black
$ make format
$ make lint
```
Please see the following links for further reading:
* http://flake8.pycqa.org
* https://isort.readthedocs.io
* https://black.readthedocs.io
License
=======

View File

@ -1,8 +1,9 @@
#!/usr/bin/env python3
from etherpump import VERSION
import sys
from etherpump import VERSION
usage = """Usage:
etherpump CMD
@ -43,7 +44,9 @@ except IndexError:
sys.exit(0)
try:
# http://stackoverflow.com/questions/301134/dynamic-module-import-in-python
cmdmod = __import__("etherpump.commands.%s" % cmd, fromlist=["etherdump.commands"])
cmdmod = __import__(
"etherpump.commands.%s" % cmd, fromlist=["etherdump.commands"]
)
cmdmod.main(args)
except ImportError as e:
print("Error performing command '{0}'\n(python said: {1})\n".format(cmd, e))

View File

@ -1,8 +1,10 @@
#!/usr/bin/env python
import json
import os
from argparse import ArgumentParser
import json, os
def main(args):
p = ArgumentParser("")

View File

@ -1,13 +1,22 @@
import re, os, json, sys
import json
import os
import re
import sys
from html.entities import name2codepoint
from math import ceil, floor
from time import sleep
from urllib.parse import urlparse, urlunparse, urlencode, quote_plus, unquote_plus
from urllib.request import urlopen, URLError, HTTPError
from html.entities import name2codepoint
from urllib.parse import (
quote_plus,
unquote_plus,
urlencode,
urlparse,
urlunparse,
)
from urllib.request import HTTPError, URLError, urlopen
groupnamepat = re.compile(r"^g\.(\w+)\$")
def splitpadname(padid):
m = groupnamepat.match(padid)
if m:
@ -15,9 +24,11 @@ def splitpadname (padid):
else:
return ("", padid)
def padurl(padid,):
return padid
def padpath(padid, pub_path="", group_path="", normalize=False):
g, p = splitpadname(padid)
p = quote_plus(p)
@ -32,6 +43,7 @@ def padpath (padid, pub_path="", group_path="", normalize=False):
else:
return os.path.join(pub_path, p)
def padpath2id(path):
if type(path) == str:
path = path.encode("utf-8")
@ -43,6 +55,7 @@ def padpath2id (path):
else:
return p.decode("utf-8")
def getjson(url, max_retry=3, retry_sleep_time=3):
ret = {}
ret["_retries"] = 0
@ -68,6 +81,7 @@ def getjson (url, max_retry=3, retry_sleep_time=3):
sleep(retry_sleep_time)
return ret
def loadpadinfo(p):
with open(p) as f:
info = json.load(f)
@ -75,6 +89,7 @@ def loadpadinfo(p):
info['localapiurl'] = info.get('apiurl')
return info
def progressbar(i, num, label="", file=sys.stderr):
p = float(i) / num
percentage = int(floor(p * 100))
@ -85,7 +100,6 @@ def progressbar (i, num, label="", file=sys.stderr):
sys.stderr.flush()
# Python developer Fredrik Lundh (author of elementtree, among other things) has such a function on his website, which works with decimal, hex and named entities:
##
# Removes HTML or XML character references and entities from a text string.
@ -111,4 +125,5 @@ def unescape(text):
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)

View File

@ -1,18 +1,29 @@
from argparse import ArgumentParser
import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args):
p = ArgumentParser("calls the createDiffHTML API function for the given padid")
p = ArgumentParser(
"calls the createDiffHTML API function for the given padid"
)
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
p.add_argument(
"--format",
default="text",
help="output format, can be: text, json; default: text",
)
p.add_argument(
"--rev", type=int, default=None, help="revision, default: latest"
)
args = p.parse_args(args)
with open(args.padinfo) as f:

View File

@ -1,17 +1,24 @@
from argparse import ArgumentParser
import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args):
p = ArgumentParser("calls the getText API function for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument(
"--format",
default="text",
help="output format, can be: text, json; default: text",
)
args = p.parse_args(args)
with open(args.padinfo) as f:

View File

@ -1,12 +1,13 @@
import json
import re
import sys
from argparse import ArgumentParser
import sys, json, re
from csv import writer
from datetime import datetime
from math import ceil, floor
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
from csv import writer
from math import ceil, floor
"""
Dumps a CSV of all pads with columns
@ -23,16 +24,27 @@ groupnamepat = re.compile(r"^g\.(\w+)\$")
out = writer(sys.stdout)
def jsonload(url):
f = urlopen(url)
data = f.read()
f.close()
return json.loads(data)
def main(args):
p = ArgumentParser("outputs a CSV of information all all pads")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument(
"--zerorevs",
default=False,
action="store_true",
help="include pads with zero revisions, default: False",
)
args = p.parse_args(args)
with open(args.padinfo) as f:
@ -49,7 +61,7 @@ def main (args):
count = 0
out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids"))
for i, padid in enumerate(padids):
p = (float(i) / numpads)
p = float(i) / numpads
percentage = int(floor(p * 100))
bars = int(ceil(p * 20))
bar = ("*" * bars) + ("-" * (20 - bars))
@ -68,17 +80,31 @@ def main (args):
padidnogroup = padid
data['padID'] = padid.encode("utf-8")
revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions']
revisions = jsonload(apiurl + 'getRevisionsCount?' + urlencode(data))[
'data'
]['revisions']
if (revisions == 0) and not args.zerorevs:
continue
lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited']
lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat()
author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
lastedited_raw = jsonload(apiurl + 'getLastEdited?' + urlencode(data))[
'data'
]['lastEdited']
lastedited_iso = datetime.fromtimestamp(
int(lastedited_raw) / 1000
).isoformat()
author_ids = jsonload(apiurl + 'listAuthorsOfPad?' + urlencode(data))[
'data'
]['authorIDs']
author_ids = " ".join(author_ids).encode("utf-8")
out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids))
out.writerow(
(
padidnogroup.encode("utf-8"),
groupname.encode("utf-8"),
revisions,
lastedited_iso,
author_ids,
)
)
count += 1
print("\nWrote {0} rows...".format(count), file=sys.stderr)

View File

@ -1,18 +1,27 @@
from argparse import ArgumentParser
import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args):
p = ArgumentParser("calls the getHTML API function for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
p.add_argument(
"--format",
default="text",
help="output format, can be: text, json; default: text",
)
p.add_argument(
"--rev", type=int, default=None, help="revision, default: latest"
)
args = p.parse_args(args)
with open(args.padinfo) as f:

View File

@ -1,17 +1,27 @@
import json
import sys
from argparse import ArgumentParser
import json, sys
from urllib.parse import urlencode
from urllib.request import urlopen, URLError, HTTPError
from urllib.request import HTTPError, URLError, urlopen
def main(args):
p = ArgumentParser("calls the getText API function for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
p.add_argument(
"--format",
default="text",
help="output format, can be: text, json; default: text",
)
p.add_argument(
"--rev", type=int, default=None, help="revision, default: latest"
)
args = p.parse_args(args)
with open(args.padinfo) as f:

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python3
from html5lib import parse
import os, sys
import os
import sys
from argparse import ArgumentParser
from xml.etree import ElementTree as ET
from html5lib import parse
def etree_indent(elem, level=0):
i = "\n" + level * " "
@ -22,6 +24,7 @@ def etree_indent(elem, level=0):
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def get_link_type(url):
lurl = url.lower()
if lurl.endswith(".html") or lurl.endswith(".htm"):
@ -37,13 +40,17 @@ def get_link_type (url):
elif lurl.endswith(".js") or lurl.endswith(".jsonp"):
return "text/javascript"
def pluralize(x):
if type(x) == list or type(x) == tuple:
return x
else:
return (x,)
def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, indent=False):
def html5tidy(
doc, charset="utf-8", title=None, scripts=None, links=None, indent=False
):
if scripts:
script_srcs = [x.attrib.get("src") for x in doc.findall(".//script")]
for src in pluralize(scripts):
@ -63,14 +70,23 @@ def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, inden
elt = existinglinks[link["href"]]
elt.attrib["rel"] = link["rel"]
else:
elt = ET.SubElement(doc.find(".//head"), "link", href=link["href"], rel=link["rel"])
elt = ET.SubElement(
doc.find(".//head"),
"link",
href=link["href"],
rel=link["rel"],
)
if linktype:
elt.attrib["type"] = linktype
if "title" in link:
elt.attrib["title"] = link["title"]
if charset:
meta_charsets = [x.attrib.get("charset") for x in doc.findall(".//meta") if x.attrib.get("charset") != None]
meta_charsets = [
x.attrib.get("charset")
for x in doc.findall(".//meta")
if x.attrib.get("charset") != None
]
if not meta_charsets:
meta = ET.SubElement(doc.find(".//head"), "meta", charset=charset)
@ -84,27 +100,83 @@ def html5tidy (doc, charset="utf-8", title=None, scripts=None, links=None, inden
etree_indent(doc)
return doc
def main(args):
p = ArgumentParser("")
p.add_argument("input", nargs="?", default=None)
p.add_argument("--indent", default=False, action="store_true")
p.add_argument("--mogrify", default=False, action="store_true", help="modify file in place")
p.add_argument("--method", default="html", help="method, default: html, values: html, xml, text")
p.add_argument(
"--mogrify",
default=False,
action="store_true",
help="modify file in place",
)
p.add_argument(
"--method",
default="html",
help="method, default: html, values: html, xml, text",
)
p.add_argument("--output", default=None, help="")
p.add_argument("--title", default=None, help="ensure/add title tag in head")
p.add_argument("--charset", default="utf-8", help="ensure/add meta tag with charset")
p.add_argument("--script", action="append", default=[], help="ensure/add script tag")
p.add_argument(
"--charset", default="utf-8", help="ensure/add meta tag with charset"
)
p.add_argument(
"--script", action="append", default=[], help="ensure/add script tag"
)
# <link>s, see https://www.w3.org/TR/html5/links.html#links
p.add_argument("--stylesheet", action="append", default=[], help="ensure/add style link")
p.add_argument("--alternate", action="append", default=[], nargs="+", help="ensure/add alternate links (optionally followed by a title and type)")
p.add_argument("--next", action="append", default=[], nargs="+", help="ensure/add alternate link")
p.add_argument("--prev", action="append", default=[], nargs="+", help="ensure/add alternate link")
p.add_argument("--search", action="append", default=[], nargs="+", help="ensure/add search link")
p.add_argument("--rss", action="append", default=[], nargs="+", help="ensure/add alternate link of type application/rss+xml")
p.add_argument("--atom", action="append", default=[], nargs="+", help="ensure/add alternate link of type application/atom+xml")
p.add_argument(
"--stylesheet",
action="append",
default=[],
help="ensure/add style link",
)
p.add_argument(
"--alternate",
action="append",
default=[],
nargs="+",
help="ensure/add alternate links (optionally followed by a title and type)",
)
p.add_argument(
"--next",
action="append",
default=[],
nargs="+",
help="ensure/add alternate link",
)
p.add_argument(
"--prev",
action="append",
default=[],
nargs="+",
help="ensure/add alternate link",
)
p.add_argument(
"--search",
action="append",
default=[],
nargs="+",
help="ensure/add search link",
)
p.add_argument(
"--rss",
action="append",
default=[],
nargs="+",
help="ensure/add alternate link of type application/rss+xml",
)
p.add_argument(
"--atom",
action="append",
default=[],
nargs="+",
help="ensure/add alternate link of type application/atom+xml",
)
args = p.parse_args(args)
links = []
def add_links(links, items, rel, _type=None):
for href in items:
d = {}
@ -128,6 +200,7 @@ def main (args):
d["href"] = href
links.append(d)
for rel in ("stylesheet", "alternate", "next", "prev", "search"):
add_links(links, getattr(args, rel), rel)
for item in args.rss:
@ -144,7 +217,13 @@ def main (args):
doc = parse(fin, treebuilder="etree", namespaceHTMLElements=False)
if fin != sys.stdin:
fin.close()
html5tidy(doc, scripts=args.script, links=links, title=args.title, indent=args.indent)
html5tidy(
doc,
scripts=args.script,
links=links,
title=args.title,
indent=args.indent,
)
# OUTPUT
tmppath = None

View File

@ -1,16 +1,19 @@
import json
import os
import re
import sys
import time
from argparse import ArgumentParser
import sys, json, re, os, time
from datetime import datetime
import dateutil.parser
from urllib.parse import urlparse, urlunparse, urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
from jinja2 import FileSystemLoader, Environment
from etherpump.commands.common import *
from time import sleep
from urllib.parse import quote, urlencode, urlparse, urlunparse
from urllib.request import HTTPError, URLError, urlopen
from jinja2 import Environment, FileSystemLoader
import dateutil.parser
from etherpump.commands.common import *
"""
index:
@ -20,6 +23,7 @@ index:
"""
def group(items, key=lambda x: x):
""" returns a list of lists, of items grouped by a key function """
ret = []
@ -34,9 +38,11 @@ def group (items, key=lambda x: x):
ret.append(keys[k])
return ret
# def base (x):
# return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
def splitextlong(x):
""" split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """
m = re.search(r"^(.*?)(\..*)$", x)
@ -45,19 +51,23 @@ def splitextlong (x):
else:
return x, ''
def base(x):
return splitextlong(x)[0]
def excerpt(t, chars=25):
if len(t) > chars:
t = t[:chars] + "..."
return t
def absurl(url, base=None):
if not url.startswith("http"):
return base + url
return url
def url_base(url):
(scheme, netloc, path, params, query, fragment) = urlparse(url)
path, _ = os.path.split(path.lstrip("/"))
@ -66,6 +76,7 @@ def url_base (url):
ret += "/"
return ret
def datetimeformat(t, format='%Y-%m-%d %H:%M:%S'):
if type(t) == str:
dt = dateutil.parser.parse(t)
@ -73,38 +84,123 @@ def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'):
else:
return time.strftime(format, time.localtime(t))
def main(args):
p = ArgumentParser("Convert dumped files to a document via a template.")
p.add_argument("input", nargs="+", help="Files to list (.meta.json files)")
p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in")
p.add_argument("--template", default="index.html", help="template name, built-ins include index.html, rss.xml; default: index.html")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: ./.etherdump/settings.json")
p.add_argument(
"--templatepath",
default=None,
help="path to find templates, default: built-in",
)
p.add_argument(
"--template",
default="index.html",
help="template name, built-ins include index.html, rss.xml; default: index.html",
)
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: ./.etherdump/settings.json",
)
# p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
p.add_argument("--order", default="padid", help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid")
p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)")
p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
p.add_argument(
"--order",
default="padid",
help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid",
)
p.add_argument(
"--reverse",
default=False,
action="store_true",
help="reverse order, default: False (reverse chrono)",
)
p.add_argument(
"--limit",
type=int,
default=0,
help="limit to number of items, default: 0 (no limit)",
)
p.add_argument(
"--skip",
default=None,
type=int,
help="skip this many items, default: None",
)
p.add_argument("--content", default=False, action="store_true", help="rss: include (full) content tag, default: False")
p.add_argument("--link", default="diffhtml,html,text", help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text")
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl")
p.add_argument(
"--content",
default=False,
action="store_true",
help="rss: include (full) content tag, default: False",
)
p.add_argument(
"--link",
default="diffhtml,html,text",
help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text",
)
p.add_argument(
"--linkbase",
default=None,
help="base url to use for links, default: try to use the feedurl",
)
p.add_argument("--output", default=None, help="output, default: stdout")
p.add_argument("--files", default=False, action="store_true", help="include files (experimental)")
p.add_argument(
"--files",
default=False,
action="store_true",
help="include files (experimental)",
)
pg = p.add_argument_group('template variables')
pg.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml")
pg.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url")
pg.add_argument("--title", default="etherpump", help="title for document or rss feed channel title, default: etherdump")
pg.add_argument("--description", default="", help="rss: channel description, default: empty")
pg.add_argument("--language", default="en-US", help="rss: feed language, default: en-US")
pg.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily")
pg.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1")
pg.add_argument("--generator", default="https://gitlab.com/activearchives/etherpump", help="generator, default: https://gitlab.com/activearchives/etherdump")
pg.add_argument("--timestamp", default=None, help="timestamp, default: now (e.g. 2015-12-01 12:30:00)")
pg.add_argument(
"--feedurl",
default="feed.xml",
help="rss: to use as feeds own (self) link, default: feed.xml",
)
pg.add_argument(
"--siteurl",
default=None,
help="rss: to use as channel's site link, default: the etherpad url",
)
pg.add_argument(
"--title",
default="etherpump",
help="title for document or rss feed channel title, default: etherdump",
)
pg.add_argument(
"--description",
default="",
help="rss: channel description, default: empty",
)
pg.add_argument(
"--language", default="en-US", help="rss: feed language, default: en-US"
)
pg.add_argument(
"--updatePeriod",
default="daily",
help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily",
)
pg.add_argument(
"--updateFrequency",
default=1,
type=int,
help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1",
)
pg.add_argument(
"--generator",
default="https://gitlab.com/activearchives/etherpump",
help="generator, default: https://gitlab.com/activearchives/etherdump",
)
pg.add_argument(
"--timestamp",
default=None,
help="timestamp, default: now (e.g. 2015-12-01 12:30:00)",
)
pg.add_argument("--next", default=None, help="next link, default: None)")
pg.add_argument("--prev", default=None, help="prev link, default: None")
@ -132,12 +228,7 @@ def main (args):
def wrappath(p):
path = "./{0}".format(p)
ext = os.path.splitext(p)[1][1:]
return {
"url": path,
"path": path,
"code": 200,
"type": ext
}
return {"url": path, "path": path, "code": 200, "type": ext}
def metaforpaths(paths):
ret = {}
@ -149,7 +240,9 @@ def main (args):
mtime = os.stat(p).st_mtime
if lastedited == None or mtime > lastedited:
lastedited = mtime
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S")
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime(
"%Y-%m-%dT%H:%M:%S"
)
ret["lastedited_raw"] = mtime
return ret
@ -190,7 +283,11 @@ def main (args):
return p
def has_version(padinfo, path):
return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path]
return [
x
for x in padinfo['versions']
if 'path' in x and x['path'] == "./" + path
]
if args.files:
inputs = args.input
@ -215,10 +312,18 @@ def main (args):
p = get_best_pad(xbasename)
if p:
if not has_version(p, x):
print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr)
print(
"Grouping file {0} with pad {1}".format(x, p['padid']),
file=sys.stderr,
)
p['versions'].append(wrappath(x))
else:
print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr)
print(
"Skipping existing version {0} ({1})...".format(
x, p['padid']
),
file=sys.stderr,
)
removelist.append(x)
# Removed Matches files
for x in removelist:
@ -242,7 +347,9 @@ def main (args):
# order items & apply limit
if args.order == "lastedited":
args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse)
args.pads.sort(
key=lambda x: x.get("lastedited_iso"), reverse=args.reverse
)
elif args.order == "pad":
args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse)
elif args.order == "padid":
@ -250,7 +357,9 @@ def main (args):
elif args.order == "revisions":
args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse)
elif args.order == "authors":
args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse)
args.pads.sort(
key=lambda x: len(x.get("authors")), reverse=args.reverse
)
else:
raise Exception("That ordering is not implemented!")

View File

@ -1,10 +1,10 @@
import json
import os
import sys
from argparse import ArgumentParser
from urllib.parse import urlencode, urlparse, urlunparse
from urllib.request import HTTPError, URLError, urlopen
from urllib.parse import urlparse, urlunparse, urlencode
from urllib.request import urlopen, URLError, HTTPError
import json, os, sys
def get_api(url, cmd=None, data=None, verbose=False):
try:
@ -34,6 +34,7 @@ def get_api(url, cmd=None, data=None, verbose=False):
# # print ("returning", resp, file=sys.stderr)
# return resp
def tryapiurl(url, verbose=False):
"""
Try to use url as api, correcting if possible.
@ -47,7 +48,9 @@ def tryapiurl (url, verbose=False):
params, query, fragment = ("", "", "")
path = path.strip("/")
# 1. try directly...
apiurl = urlunparse((scheme, netloc, path, params, query, fragment))+"/"
apiurl = (
urlunparse((scheme, netloc, path, params, query, fragment)) + "/"
)
if get_api(apiurl, "listAllPads", verbose=verbose):
return apiurl
# 2. try with += api/1.2.9
@ -60,9 +63,15 @@ def tryapiurl (url, verbose=False):
except URLError as e:
print("URLError", e, file=sys.stderr)
def main(args):
p = ArgumentParser("initialize an etherpump folder")
p.add_argument("arg", nargs="*", default=[], help="optional positional args: path etherpadurl")
p.add_argument(
"arg",
nargs="*",
default=[],
help="optional positional args: path etherpadurl",
)
p.add_argument("--path", default=None, help="path to initialize")
p.add_argument("--padurl", default=None, help="")
p.add_argument("--apikey", default=None, help="")
@ -70,7 +79,6 @@ def main(args):
p.add_argument("--reinit", default=False, action="store_true", help="")
args = p.parse_args(args)
path = args.path
if path == None and len(args.arg):
path = args.arg[0]
@ -89,7 +97,9 @@ def main(args):
with open(padinfopath) as f:
padinfo = json.load(f)
if not args.reinit:
print ("Folder is already initialized. Use --reinit to reset settings.")
print(
"Folder is already initialized. Use --reinit to reset settings."
)
sys.exit(0)
except IOError:
pass
@ -109,13 +119,18 @@ def main(args):
apikey = args.apikey
while True:
if apikey:
resp = get_api(apiurl, "listAllPads", {"apikey": apikey}, verbose=args.verbose)
resp = get_api(
apiurl, "listAllPads", {"apikey": apikey}, verbose=args.verbose
)
if resp and resp["code"] == 0:
# print ("GOOD")
break
else:
print("bad")
print ("The APIKEY is the contents of the file APIKEY.txt in the etherpad folder", file=sys.stderr)
print(
"The APIKEY is the contents of the file APIKEY.txt in the etherpad folder",
file=sys.stderr,
)
apikey = input("Please paste the APIKEY: ").strip()
padinfo["apikey"] = apikey

View File

@ -1,9 +1,11 @@
import json
import os
import re
from argparse import ArgumentParser
import json, os, re
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def group(items, key=lambda x: x):
ret = []
@ -18,6 +20,7 @@ def group (items, key=lambda x: x):
ret.append(keys[k])
return ret
def main(args):
p = ArgumentParser("")
p.add_argument("input", nargs="+", help="filenames")
@ -30,6 +33,7 @@ def main(args):
def base(x):
return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x)
# from pprint import pprint
# pprint()
gg = group(inputs, base)

View File

@ -1,16 +1,25 @@
from argparse import ArgumentParser
import json
import sys
from argparse import ArgumentParser
from urllib.parse import urlencode, urlparse, urlunparse
from urllib.request import HTTPError, URLError, urlopen
from etherpump.commands.common import getjson
from urllib.parse import urlparse, urlunparse, urlencode
from urllib.request import urlopen, URLError, HTTPError
def main(args):
p = ArgumentParser("call listAllPads and print the results")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="lines", help="output format: lines, json; default lines")
p.add_argument(
"--format",
default="lines",
help="output format: lines, json; default lines",
)
args = p.parse_args(args)
with open(args.padinfo) as f:

View File

@ -1,17 +1,24 @@
from argparse import ArgumentParser
import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args):
p = ArgumentParser("call listAuthorsOfPad for the padid")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="lines", help="output format, can be: lines, json; default: lines")
p.add_argument(
"--format",
default="lines",
help="output format, can be: lines, json; default: lines",
)
args = p.parse_args(args)
with open(args.padinfo) as f:

View File

@ -1,17 +1,20 @@
import json
import os
import re
import sys
import time
from argparse import ArgumentParser
import sys, json, re, os, time
from datetime import datetime
from time import sleep
from urllib.parse import quote, urlencode, urlparse, urlunparse
from urllib.request import HTTPError, URLError, urlopen
from jinja2 import Environment, FileSystemLoader
import dateutil.parser
import pypandoc
from urllib.parse import urlparse, urlunparse, urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
from jinja2 import FileSystemLoader, Environment
from etherpump.commands.common import *
from time import sleep
import dateutil.parser
"""
publication:
@ -21,6 +24,7 @@ publication:
"""
def group(items, key=lambda x: x):
""" returns a list of lists, of items grouped by a key function """
ret = []
@ -35,9 +39,11 @@ def group (items, key=lambda x: x):
ret.append(keys[k])
return ret
# def base (x):
# return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
def splitextlong(x):
""" split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """
m = re.search(r"^(.*?)(\..*)$", x)
@ -46,19 +52,23 @@ def splitextlong (x):
else:
return x, ''
def base(x):
return splitextlong(x)[0]
def excerpt(t, chars=25):
if len(t) > chars:
t = t[:chars] + "..."
return t
def absurl(url, base=None):
if not url.startswith("http"):
return base + url
return url
def url_base(url):
(scheme, netloc, path, params, query, fragment) = urlparse(url)
path, _ = os.path.split(path.lstrip("/"))
@ -67,6 +77,7 @@ def url_base (url):
ret += "/"
return ret
def datetimeformat(t, format='%Y-%m-%d %H:%M:%S'):
if type(t) == str:
dt = dateutil.parser.parse(t)
@ -74,38 +85,123 @@ def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'):
else:
return time.strftime(format, time.localtime(t))
def main(args):
p = ArgumentParser("Convert dumped files to a document via a template.")
p.add_argument("input", nargs="+", help="Files to list (.meta.json files)")
p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in")
p.add_argument("--template", default="publication.html", help="template name, built-ins include publication.html; default: publication.html")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: ./.etherdump/settings.json")
p.add_argument(
"--templatepath",
default=None,
help="path to find templates, default: built-in",
)
p.add_argument(
"--template",
default="publication.html",
help="template name, built-ins include publication.html; default: publication.html",
)
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: ./.etherdump/settings.json",
)
# p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
p.add_argument("--order", default="padid", help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid")
p.add_argument("--reverse", default=False, action="store_true", help="reverse order, default: False (reverse chrono)")
p.add_argument("--limit", type=int, default=0, help="limit to number of items, default: 0 (no limit)")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
p.add_argument(
"--order",
default="padid",
help="order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid",
)
p.add_argument(
"--reverse",
default=False,
action="store_true",
help="reverse order, default: False (reverse chrono)",
)
p.add_argument(
"--limit",
type=int,
default=0,
help="limit to number of items, default: 0 (no limit)",
)
p.add_argument(
"--skip",
default=None,
type=int,
help="skip this many items, default: None",
)
p.add_argument("--content", default=False, action="store_true", help="rss: include (full) content tag, default: False")
p.add_argument("--link", default="diffhtml,html,text", help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text")
p.add_argument("--linkbase", default=None, help="base url to use for links, default: try to use the feedurl")
p.add_argument(
"--content",
default=False,
action="store_true",
help="rss: include (full) content tag, default: False",
)
p.add_argument(
"--link",
default="diffhtml,html,text",
help="link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text",
)
p.add_argument(
"--linkbase",
default=None,
help="base url to use for links, default: try to use the feedurl",
)
p.add_argument("--output", default=None, help="output, default: stdout")
p.add_argument("--files", default=False, action="store_true", help="include files (experimental)")
p.add_argument(
"--files",
default=False,
action="store_true",
help="include files (experimental)",
)
pg = p.add_argument_group('template variables')
pg.add_argument("--feedurl", default="feed.xml", help="rss: to use as feeds own (self) link, default: feed.xml")
pg.add_argument("--siteurl", default=None, help="rss: to use as channel's site link, default: the etherpad url")
pg.add_argument("--title", default="etherpump", help="title for document or rss feed channel title, default: etherdump")
pg.add_argument("--description", default="", help="rss: channel description, default: empty")
pg.add_argument("--language", default="en-US", help="rss: feed language, default: en-US")
pg.add_argument("--updatePeriod", default="daily", help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily")
pg.add_argument("--updateFrequency", default=1, type=int, help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1")
pg.add_argument("--generator", default="https://gitlab.com/activearchives/etherpump", help="generator, default: https://gitlab.com/activearchives/etherdump")
pg.add_argument("--timestamp", default=None, help="timestamp, default: now (e.g. 2015-12-01 12:30:00)")
pg.add_argument(
"--feedurl",
default="feed.xml",
help="rss: to use as feeds own (self) link, default: feed.xml",
)
pg.add_argument(
"--siteurl",
default=None,
help="rss: to use as channel's site link, default: the etherpad url",
)
pg.add_argument(
"--title",
default="etherpump",
help="title for document or rss feed channel title, default: etherdump",
)
pg.add_argument(
"--description",
default="",
help="rss: channel description, default: empty",
)
pg.add_argument(
"--language", default="en-US", help="rss: feed language, default: en-US"
)
pg.add_argument(
"--updatePeriod",
default="daily",
help="rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily",
)
pg.add_argument(
"--updateFrequency",
default=1,
type=int,
help="rss: update frequency within the update period (where 2 would mean twice per period); default: 1",
)
pg.add_argument(
"--generator",
default="https://gitlab.com/activearchives/etherpump",
help="generator, default: https://gitlab.com/activearchives/etherdump",
)
pg.add_argument(
"--timestamp",
default=None,
help="timestamp, default: now (e.g. 2015-12-01 12:30:00)",
)
pg.add_argument("--next", default=None, help="next link, default: None)")
pg.add_argument("--prev", default=None, help="prev link, default: None")
@ -133,12 +229,7 @@ def main (args):
def wrappath(p):
path = "./{0}".format(p)
ext = os.path.splitext(p)[1][1:]
return {
"url": path,
"path": path,
"code": 200,
"type": ext
}
return {"url": path, "path": path, "code": 200, "type": ext}
def metaforpaths(paths):
ret = {}
@ -150,7 +241,9 @@ def main (args):
mtime = os.stat(p).st_mtime
if lastedited == None or mtime > lastedited:
lastedited = mtime
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S")
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime(
"%Y-%m-%dT%H:%M:%S"
)
ret["lastedited_raw"] = mtime
return ret
@ -191,7 +284,11 @@ def main (args):
return p
def has_version(padinfo, path):
return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path]
return [
x
for x in padinfo['versions']
if 'path' in x and x['path'] == "./" + path
]
if args.files:
inputs = args.input
@ -216,10 +313,18 @@ def main (args):
p = get_best_pad(xbasename)
if p:
if not has_version(p, x):
print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr)
print(
"Grouping file {0} with pad {1}".format(x, p['padid']),
file=sys.stderr,
)
p['versions'].append(wrappath(x))
else:
print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr)
print(
"Skipping existing version {0} ({1})...".format(
x, p['padid']
),
file=sys.stderr,
)
removelist.append(x)
# Removed Matches files
for x in removelist:
@ -243,7 +348,9 @@ def main (args):
# order items & apply limit
if args.order == "lastedited":
args.pads.sort(key=lambda x: x.get("lastedited_iso"), reverse=args.reverse)
args.pads.sort(
key=lambda x: x.get("lastedited_iso"), reverse=args.reverse
)
elif args.order == "pad":
args.pads.sort(key=lambda x: x.get("pad"), reverse=args.reverse)
elif args.order == "padid":
@ -251,7 +358,9 @@ def main (args):
elif args.order == "revisions":
args.pads.sort(key=lambda x: x.get("revisions"), reverse=args.reverse)
elif args.order == "authors":
args.pads.sort(key=lambda x: len(x.get("authors")), reverse=args.reverse)
args.pads.sort(
key=lambda x: len(x.get("authors")), reverse=args.reverse
)
elif args.order == "custom":
# TODO: make this list non-static, but a variable that can be given from the CLI
@ -261,7 +370,8 @@ def main (args):
'nooo.relearn.activating.the.archive',
'nooo.relearn.call.for.proposals',
'nooo.relearn.call.for.proposals-proposal-footnote',
'nooo.relearn.colophon']
'nooo.relearn.colophon',
]
order = []
for x in customorder:
for pad in args.pads:

View File

@ -1,17 +1,19 @@
import json
import os
import re
import sys
from argparse import ArgumentParser
import sys, json, re, os
from datetime import datetime
from fnmatch import fnmatch
from time import sleep
from urllib.parse import quote, urlencode
from urllib.request import HTTPError, URLError, urlopen
from xml.etree import ElementTree as ET
from urllib.parse import urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
import html5lib
from etherpump.commands.common import *
from time import sleep
from etherpump.commands.html5tidy import html5tidy
import html5lib
from xml.etree import ElementTree as ET
from fnmatch import fnmatch
# debugging
# import ElementTree as ET
@ -28,6 +30,7 @@ use/prefer public interfaces ? (export functions)
"""
def try_deleting(files):
for f in files:
try:
@ -35,36 +38,136 @@ def try_deleting (files):
except OSError as e:
pass
def main(args):
p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)")
p = ArgumentParser(
"Check for pads that have changed since last sync (according to .meta.json)"
)
p.add_argument("padid", nargs="*", default=[])
p.add_argument("--glob", default=False, help="download pads matching a glob pattern")
p.add_argument(
"--glob", default=False, help="download pads matching a glob pattern"
)
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherpump/settings.json")
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
p.add_argument("--pub", default="p", help="folder to store files for public pads, default: p")
p.add_argument("--group", default="g", help="folder to store files for group pads, default: g")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False")
p.add_argument("--text", default=False, action="store_true", help="download text to PADID.txt, default: False")
p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False")
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.diff.html, default: False")
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")
p.add_argument("--folder", default=False, action="store_true", help="dump files in a folder named PADID (meta, text, html, dhtml), default: False")
p.add_argument("--output", default=False, action="store_true", help="output changed padids on stdout")
p.add_argument("--force", default=False, action="store_true", help="reload, even if revisions count matches previous")
p.add_argument("--no-raw-ext", default=False, action="store_true", help="save plain text as padname with no (additional) extension")
p.add_argument("--fix-names", default=False, action="store_true", help="normalize padid's (no spaces, special control chars) for use in file names")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherpump/settings.json",
)
p.add_argument(
"--zerorevs",
default=False,
action="store_true",
help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)",
)
p.add_argument(
"--pub",
default="p",
help="folder to store files for public pads, default: p",
)
p.add_argument(
"--group",
default="g",
help="folder to store files for group pads, default: g",
)
p.add_argument(
"--skip",
default=None,
type=int,
help="skip this many items, default: None",
)
p.add_argument(
"--meta",
default=False,
action="store_true",
help="download meta to PADID.meta.json, default: False",
)
p.add_argument(
"--text",
default=False,
action="store_true",
help="download text to PADID.txt, default: False",
)
p.add_argument(
"--html",
default=False,
action="store_true",
help="download html to PADID.html, default: False",
)
p.add_argument(
"--dhtml",
default=False,
action="store_true",
help="download dhtml to PADID.diff.html, default: False",
)
p.add_argument(
"--all",
default=False,
action="store_true",
help="download all files (meta, text, html, dhtml), default: False",
)
p.add_argument(
"--folder",
default=False,
action="store_true",
help="dump files in a folder named PADID (meta, text, html, dhtml), default: False",
)
p.add_argument(
"--output",
default=False,
action="store_true",
help="output changed padids on stdout",
)
p.add_argument(
"--force",
default=False,
action="store_true",
help="reload, even if revisions count matches previous",
)
p.add_argument(
"--no-raw-ext",
default=False,
action="store_true",
help="save plain text as padname with no (additional) extension",
)
p.add_argument(
"--fix-names",
default=False,
action="store_true",
help="normalize padid's (no spaces, special control chars) for use in file names",
)
p.add_argument("--filter-ext", default=None, help="filter pads by extension")
p.add_argument(
"--filter-ext", default=None, help="filter pads by extension"
)
p.add_argument("--css", default="/styles.css", help="add css url to output pages, default: /styles.css")
p.add_argument("--script", default="/versions.js", help="add script url to output pages, default: /versions.js")
p.add_argument(
"--css",
default="/styles.css",
help="add css url to output pages, default: /styles.css",
)
p.add_argument(
"--script",
default="/versions.js",
help="add script url to output pages, default: /versions.js",
)
p.add_argument("--nopublish", default="__NOPUBLISH__", help="no publish magic word, default: __NOPUBLISH__")
p.add_argument("--publish", default="__PUBLISH__", help="the publish magic word, default: __PUBLISH__")
p.add_argument("--publish-opt-in", default=False, action="store_true", help="ensure `--publish` is honoured instead of `--nopublish`")
p.add_argument(
"--nopublish",
default="__NOPUBLISH__",
help="no publish magic word, default: __NOPUBLISH__",
)
p.add_argument(
"--publish",
default="__PUBLISH__",
help="the publish magic word, default: __PUBLISH__",
)
p.add_argument(
"--publish-opt-in",
default=False,
action="store_true",
help="ensure `--publish` is honoured instead of `--nopublish`",
)
args = p.parse_args(args)
@ -79,10 +182,14 @@ def main (args):
if args.padid:
padids = args.padid
elif args.glob:
padids = getjson(info['localapiurl']+'listAllPads?'+urlencode(data))['data']['padIDs']
padids = getjson(
info['localapiurl'] + 'listAllPads?' + urlencode(data)
)['data']['padIDs']
padids = [x for x in padids if fnmatch(x, args.glob)]
else:
padids = getjson(info['localapiurl']+'listAllPads?'+urlencode(data))['data']['padIDs']
padids = getjson(
info['localapiurl'] + 'listAllPads?' + urlencode(data)
)['data']['padIDs']
padids.sort()
numpads = len(padids)
# maxmsglen = 0
@ -110,21 +217,31 @@ def main (args):
if os.path.exists(metapath):
with open(metapath) as f:
meta.update(json.load(f))
revisions = getjson(info['localapiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
revisions = getjson(
info['localapiurl']
+ 'getRevisionsCount?'
+ urlencode(data)
)['data']['revisions']
if meta['revisions'] == revisions and not args.force:
skip = True
break
meta['padid'] = padid # .encode("utf-8")
versions = meta["versions"] = []
versions.append({
versions.append(
{
"url": padurlbase + quote(padid),
"type": "pad",
"code": 200
})
"code": 200,
}
)
if revisions == None:
meta['revisions'] = getjson(info['localapiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
meta['revisions'] = getjson(
info['localapiurl']
+ 'getRevisionsCount?'
+ urlencode(data)
)['data']['revisions']
else:
meta['revisions'] = revisions
@ -136,20 +253,36 @@ def main (args):
# todo: load more metadata!
meta['group'], meta['pad'] = splitpadname(padid)
meta['pathbase'] = p
meta['lastedited_raw'] = int(getjson(info['localapiurl']+'getLastEdited?'+urlencode(data))['data']['lastEdited'])
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat()
meta['author_ids'] = getjson(info['localapiurl']+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
meta['lastedited_raw'] = int(
getjson(
info['localapiurl'] + 'getLastEdited?' + urlencode(data)
)['data']['lastEdited']
)
meta['lastedited_iso'] = datetime.fromtimestamp(
int(meta['lastedited_raw']) / 1000
).isoformat()
meta['author_ids'] = getjson(
info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
)['data']['authorIDs']
break
except HTTPError as e:
tries += 1
if tries > 3:
print ("Too many failures ({0}), skipping".format(padid), file=sys.stderr)
print(
"Too many failures ({0}), skipping".format(padid),
file=sys.stderr,
)
skip = True
break
else:
sleep(3)
except TypeError as e:
print ("Type Error loading pad {0} (phantom pad?), skipping".format(padid), file=sys.stderr)
print(
"Type Error loading pad {0} (phantom pad?), skipping".format(
padid
),
file=sys.stderr,
)
skip = True
break
@ -180,14 +313,28 @@ def main (args):
##########################################
if args.nopublish and args.nopublish in text:
# NEED TO PURGE ANY EXISTING DOCS
try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json"))
try_deleting(
(
p + raw_ext,
p + ".raw.html",
p + ".diff.html",
p + ".meta.json",
)
)
continue
##########################################
## ENFORCE __PUBLISH__ MAGIC WORD
##########################################
if args.publish_opt_in and args.publish not in text:
try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json"))
try_deleting(
(
p + raw_ext,
p + ".raw.html",
p + ".diff.html",
p + ".meta.json",
)
)
continue
ver["path"] = p + raw_ext
@ -202,21 +349,58 @@ def main (args):
links.append({"href": args.css, "rel": "stylesheet"})
# todo, make this process reflect which files actually were made
versionbaseurl = quote(padid)
links.append({"href":versions[0]["url"], "rel":"alternate", "type":"text/html", "title":"Etherpad"})
links.append(
{
"href": versions[0]["url"],
"rel": "alternate",
"type": "text/html",
"title": "Etherpad",
}
)
if args.all or args.text:
links.append({"href":versionbaseurl+raw_ext, "rel":"alternate", "type":"text/plain", "title":"Plain text"})
links.append(
{
"href": versionbaseurl + raw_ext,
"rel": "alternate",
"type": "text/plain",
"title": "Plain text",
}
)
if args.all or args.html:
links.append({"href":versionbaseurl+".raw.html", "rel":"alternate", "type":"text/html", "title":"HTML"})
links.append(
{
"href": versionbaseurl + ".raw.html",
"rel": "alternate",
"type": "text/html",
"title": "HTML",
}
)
if args.all or args.dhtml:
links.append({"href":versionbaseurl+".diff.html", "rel":"alternate", "type":"text/html", "title":"HTML with author colors"})
links.append(
{
"href": versionbaseurl + ".diff.html",
"rel": "alternate",
"type": "text/html",
"title": "HTML with author colors",
}
)
if args.all or args.meta:
links.append({"href":versionbaseurl+".meta.json", "rel":"alternate", "type":"application/json", "title":"Meta data"})
links.append(
{
"href": versionbaseurl + ".meta.json",
"rel": "alternate",
"type": "application/json",
"title": "Meta data",
}
)
# links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"})
if args.all or args.dhtml:
data['startRev'] = "0"
html = getjson(info['localapiurl']+'createDiffHTML?'+urlencode(data))
html = getjson(
info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
)
ver = {"type": "diffhtml"}
versions.append(ver)
ver["code"] = html["_code"]
@ -226,11 +410,22 @@ def main (args):
ver["path"] = p + ".diff.html"
ver["url"] = quote(ver["path"])
# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
doc = html5lib.parse(
html, treebuilder="etree", namespaceHTMLElements=False
)
html5tidy(
doc,
indent=True,
title=padid,
scripts=args.script,
links=links,
)
with open(ver["path"], "w") as f:
# f.write(html.encode("utf-8"))
print(ET.tostring(doc, method="html", encoding="unicode"), file=f)
print(
ET.tostring(doc, method="html", encoding="unicode"),
file=f,
)
except TypeError:
# Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!
ver["message"] = html["message"]
@ -247,11 +442,22 @@ def main (args):
html = html['data']['html']
ver["path"] = p + ".raw.html"
ver["url"] = quote(ver["path"])
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
doc = html5lib.parse(
html, treebuilder="etree", namespaceHTMLElements=False
)
html5tidy(
doc,
indent=True,
title=padid,
scripts=args.script,
links=links,
)
with open(ver["path"], "w") as f:
# f.write(html.encode("utf-8"))
print (ET.tostring(doc, method="html", encoding="unicode"), file=f)
print(
ET.tostring(doc, method="html", encoding="unicode"),
file=f,
)
# output meta
if args.all or args.meta:

View File

@ -1,14 +1,18 @@
from argparse import ArgumentParser
import json
from argparse import ArgumentParser
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
def main(args):
p = ArgumentParser("call getRevisionsCount for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
args = p.parse_args(args)

View File

@ -1,23 +1,40 @@
import json
import sys
from argparse import ArgumentParser
import json, sys
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
import requests
LIMIT_BYTES = 100 * 1000
def main(args):
p = ArgumentParser("calls the setHTML API function for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--html", default=None, help="html, default: read from stdin")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--html", default=None, help="html, default: read from stdin"
)
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
# p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--create", default=False, action="store_true", help="flag to create pad if necessary")
p.add_argument("--limit", default=False, action="store_true", help="limit text to 100k (etherpad limit)")
p.add_argument(
"--create",
default=False,
action="store_true",
help="flag to create pad if necessary",
)
p.add_argument(
"--limit",
default=False,
action="store_true",
help="limit text to 100k (etherpad limit)",
)
args = p.parse_args(args)
with open(args.padinfo) as f:
@ -31,7 +48,11 @@ def main(args):
createPad = False
if args.create:
# check if it's in fact necessary
requesturl = apiurl+'getRevisionsCount?'+urlencode({'apikey': info['apikey'], 'padID': args.padid})
requesturl = (
apiurl
+ 'getRevisionsCount?'
+ urlencode({'apikey': info['apikey'], 'padID': args.padid})
)
results = json.load(urlopen(requesturl))
print(json.dumps(results, indent=2), file=sys.stderr)
if results['code'] != 0:
@ -50,7 +71,9 @@ def main(args):
requesturl = apiurl + 'createPad'
if args.showurl:
print(requesturl)
results = requests.post(requesturl, params=params, data={'text': ''}) # json.load(urlopen(requesturl))
results = requests.post(
requesturl, params=params, data={'text': ''}
) # json.load(urlopen(requesturl))
results = json.loads(results.text)
print(json.dumps(results, indent=2))
@ -62,6 +85,10 @@ def main(args):
if args.showurl:
print(requesturl)
# params['html'] = html
results = requests.post(requesturl, params={'apikey': info['apikey']}, data={'apikey': info['apikey'], 'padID': args.padid, 'html': html}) # json.load(urlopen(requesturl))
results = requests.post(
requesturl,
params={'apikey': info['apikey']},
data={'apikey': info['apikey'], 'padID': args.padid, 'html': html},
) # json.load(urlopen(requesturl))
results = json.loads(results.text)
print(json.dumps(results, indent=2))

View File

@ -1,24 +1,39 @@
import json
import sys
from argparse import ArgumentParser
import json, sys
from urllib.parse import urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
from urllib.parse import quote, urlencode
from urllib.request import HTTPError, URLError, urlopen
import requests
LIMIT_BYTES = 100 * 1000
def main(args):
p = ArgumentParser("calls the getText API function for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--text", default=None, help="text, default: read from stdin")
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument(
"--text", default=None, help="text, default: read from stdin"
)
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument("--showurl", default=False, action="store_true")
# p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--create", default=False, action="store_true", help="flag to create pad if necessary")
p.add_argument("--limit", default=False, action="store_true", help="limit text to 100k (etherpad limit)")
p.add_argument(
"--create",
default=False,
action="store_true",
help="flag to create pad if necessary",
)
p.add_argument(
"--limit",
default=False,
action="store_true",
help="limit text to 100k (etherpad limit)",
)
args = p.parse_args(args)
with open(args.padinfo) as f:
@ -55,8 +70,14 @@ def main(args):
if args.showurl:
print(requesturl)
results = requests.post(requesturl, params=data) # json.load(urlopen(requesturl))
results = requests.post(
requesturl, params=data
) # json.load(urlopen(requesturl))
results = json.loads(results.text)
if results['code'] != 0:
print ("setText: ERROR ({0}) on pad {1}: {2}".format(results['code'], args.padid, results['message']))
print(
"setText: ERROR ({0}) on pad {1}: {2}".format(
results['code'], args.padid, results['message']
)
)
# json.dumps(results, indent=2)

View File

@ -1,17 +1,25 @@
import json
import re
import sys
from argparse import ArgumentParser
import json, sys, re
from .common import *
"""
Extract and output selected fields of metadata
"""
def main(args):
p = ArgumentParser("extract & display meta data from a specific .meta.json file, or for a given padid (nb: it still looks for a .meta.json file)")
p = ArgumentParser(
"extract & display meta data from a specific .meta.json file, or for a given padid (nb: it still looks for a .meta.json file)"
)
p.add_argument("--path", default=None, help="read from a meta.json file")
p.add_argument("--padid", default=None, help="read meta for this padid")
p.add_argument("--format", default="{padid}", help="format str, default: {padid}")
p.add_argument(
"--format", default="{padid}", help="format str, default: {padid}"
)
args = p.parse_args(args)
path = args.path
@ -28,4 +36,3 @@ def main (args):
formatstr = args.format.decode("utf-8")
formatstr = re.sub(r"{(\w+)}", r"{0[\1]}", formatstr)
print(formatstr.format(meta).encode("utf-8"))

View File

@ -1,13 +1,17 @@
import json
import os
import re
import sys
from argparse import ArgumentParser
import sys, json, re, os
from datetime import datetime
from math import ceil, floor
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
from math import ceil, floor
from .common import *
"""
status (meta):
Update meta data files for those that have changed.
@ -22,10 +26,12 @@ complicates the "syncing" idea....
"""
class PadItemException(Exception):
pass
class PadItem ():
class PadItem:
def __init__(self, padid=None, path=None, padexists=False):
self.padexists = padexists
if padid and path:
@ -55,23 +61,74 @@ class PadItem ():
def fileexists(self):
return os.path.exists(self.path)
def ignore_p(path, settings=None):
if path.startswith("."):
return True
def main(args):
p = ArgumentParser("Check for pads that have changed since last sync (according to .meta.json)")
p = ArgumentParser(
"Check for pads that have changed since last sync (according to .meta.json)"
)
# p.add_argument("padid", nargs="*", default=[])
p.add_argument("--padinfo", default=".etherpump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
p.add_argument("--pub", default=".", help="folder to store files for public pads, default: pub")
p.add_argument("--group", default="g", help="folder to store files for group pads, default: g")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False")
p.add_argument("--text", default=False, action="store_true", help="download text to PADID.txt, default: False")
p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False")
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.dhtml, default: False")
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")
p.add_argument(
"--padinfo",
default=".etherpump/settings.json",
help="settings, default: .etherdump/settings.json",
)
p.add_argument(
"--zerorevs",
default=False,
action="store_true",
help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)",
)
p.add_argument(
"--pub",
default=".",
help="folder to store files for public pads, default: pub",
)
p.add_argument(
"--group",
default="g",
help="folder to store files for group pads, default: g",
)
p.add_argument(
"--skip",
default=None,
type=int,
help="skip this many items, default: None",
)
p.add_argument(
"--meta",
default=False,
action="store_true",
help="download meta to PADID.meta.json, default: False",
)
p.add_argument(
"--text",
default=False,
action="store_true",
help="download text to PADID.txt, default: False",
)
p.add_argument(
"--html",
default=False,
action="store_true",
help="download html to PADID.html, default: False",
)
p.add_argument(
"--dhtml",
default=False,
action="store_true",
help="download dhtml to PADID.dhtml, default: False",
)
p.add_argument(
"--all",
default=False,
action="store_true",
help="download all files (meta, text, html, dhtml), default: False",
)
args = p.parse_args(args)
info = loadpadinfo(args.padinfo)
@ -81,7 +138,9 @@ def main (args):
padsbypath = {}
# listAllPads
padids = getjson(info['apiurl']+'listAllPads?'+urlencode(data))['data']['padIDs']
padids = getjson(info['apiurl'] + 'listAllPads?' + urlencode(data))['data'][
'padIDs'
]
padids.sort()
for padid in padids:
pad = PadItem(padid=padid, padexists=True)

12
pyproject.toml Normal file
View File

@ -0,0 +1,12 @@
[build-system]
requires = [
"setuptools>=41.0.0",
"setuptools-scm",
"wheel",
]
build-backend = "setuptools.build_meta"
[tool.black]
line-length = 80
target-version = ['py35', 'py36', 'py37']
skip-string-normalization = true

9
setup.cfg Normal file
View File

@ -0,0 +1,9 @@
[flake8]
max-line-length = 80
[isort]
known_first_party = etherpump
line_length = 80
multi_line_output = 3
include_trailing_comma = True
skip = .venv

View File

@ -1,8 +1,9 @@
#!/usr/bin/env python3
from etherpump import VERSION
from setuptools import find_packages, setup
from etherpump import VERSION
with open('README.md', 'r') as handle:
long_description = handle.read()