This commit is contained in:
Michael Murtaugh 2018-01-12 14:42:55 +01:00
parent 70ae27fe1f
commit 8df9c56ac1
21 changed files with 223 additions and 105 deletions

View File

@ -66,3 +66,11 @@ Originally designed for use at: [constant](http://etherdump.constantvzw.org/).
-----------------------------------------------
Preparations for [Machine Research](https://machineresearch.wordpress.com/) [2](http://constantvzw.org/site/Machine-Research,2646.html)
6 Oct 2017
----------------------
Feature request from PW: When deleting a previously public document, generate a page / pages with an explanation (along the lines of "This document was previously public but has been marked .... maybe give links to search").
3 Nov 2017
---------------
machineresearch seems to be __NOPUBLISH__ but still exists (also in recentchanges)

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python
#!/usr/bin/env python3
from __future__ import print_function
import sys
usage = """Usage:
@ -31,13 +32,13 @@ try:
else:
args = sys.argv[2:]
except IndexError:
print usage
print (usage)
sys.exit(0)
try:
# http://stackoverflow.com/questions/301134/dynamic-module-import-in-python
cmdmod = __import__("etherdump.commands.%s" % cmd, fromlist=["etherdump.commands"])
cmdmod.main(args)
except ImportError, e:
print "Error performing command '{0}'\n(python said: {1})\n".format(cmd, e)
print usage
except ImportError as e:
print ("Error performing command '{0}'\n(python said: {1})\n".format(cmd, e))
print (usage)

View File

@ -1,10 +1,22 @@
from __future__ import print_function
import re, os, json, sys
from urllib import quote_plus, unquote_plus
from math import ceil, floor
from urllib2 import urlopen, HTTPError
from time import sleep
try:
# python2
from urlparse import urlparse, urlunparse
from urllib2 import urlopen, URLError, HTTPError
from urllib import urlencode
from urllib import quote_plus, unquote_plus
from htmlentitydefs import name2codepoint
input = raw_input
except ImportError:
# python3
from urllib.parse import urlparse, urlunparse, urlencode, quote_plus, unquote_plus
from urllib.request import urlopen, URLError, HTTPError
from html.entities import name2codepoint
groupnamepat = re.compile(r"^g\.(\w+)\$")
def splitpadname (padid):
@ -17,15 +29,19 @@ def splitpadname (padid):
def padurl (padid, ):
return padid
def padpath (padid, pub_path=u"", group_path=u""):
def padpath (padid, pub_path=u"", group_path=u"", normalize=False):
g, p = splitpadname(padid)
if type(g) == unicode:
g = g.encode("utf-8")
if type(p) == unicode:
p = p.encode("utf-8")
# if type(g) == unicode:
# g = g.encode("utf-8")
# if type(p) == unicode:
# p = p.encode("utf-8")
p = quote_plus(p)
# p = p.replace(" ", "_")
# p = p.replace("*", "-")
if normalize:
p = p.replace(" ", "_")
p = p.replace("(", "")
p = p.replace(")", "")
p = p.replace("?", "")
p = p.replace("'", "")
if g:
return os.path.join(group_path, g, p)
else:
@ -49,6 +65,7 @@ def getjson (url, max_retry=3, retry_sleep_time=0.5):
try:
f = urlopen(url)
data = f.read()
data = data.decode("utf-8")
rurl = f.geturl()
f.close()
ret.update(json.loads(data))
@ -77,13 +94,12 @@ def progressbar (i, num, label="", file=sys.stderr):
bars = int(ceil(p*20))
bar = ("*"*bars) + ("-"*(20-bars))
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label)
sys.stderr.write(msg.encode("utf-8"))
sys.stderr.write(msg)
sys.stderr.flush()
# Python developer Fredrik Lundh (author of elementtree, among other things) has such a function on his website, which works with decimal, hex and named entities:
import re, htmlentitydefs
##
# Removes HTML or XML character references and entities from a text string.
#
@ -104,7 +120,7 @@ def unescape(text):
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
text = unichr(name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json
from urllib import urlencode
@ -27,13 +26,13 @@ def main(args):
data['rev'] = args.rev
requesturl = apiurl+'createDiffHTML?'+urlencode(data)
if args.showurl:
print requesturl
print (requesturl)
else:
try:
results = json.load(urlopen(requesturl))['data']
if args.format == "json":
print json.dumps(results)
print (json.dumps(results))
else:
print results['html'].encode("utf-8")
print (results['html'].encode("utf-8"))
except HTTPError as e:
pass

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json
from urllib import urlencode
@ -23,11 +22,11 @@ def main(args):
data['padID'] = args.padid # is utf-8 encoded
requesturl = apiurl+'deletePad?'+urlencode(data)
if args.showurl:
print requesturl
print (requesturl)
else:
results = json.load(urlopen(requesturl))
if args.format == "json":
print json.dumps(results)
print (json.dumps(results))
else:
if results['data']:
print results['data']['text'].encode("utf-8")
print (results['data']['text'].encode("utf-8"))

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import sys, json, re

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json
from urllib import urlencode
@ -26,10 +25,10 @@ def main(args):
data['rev'] = args.rev
requesturl = apiurl+'getHTML?'+urlencode(data)
if args.showurl:
print requesturl
print (requesturl)
else:
results = json.load(urlopen(requesturl))['data']
if args.format == "json":
print json.dumps(results)
print (json.dumps(results))
else:
print results['html'].encode("utf-8")
print (results['html'].encode("utf-8"))

View File

@ -1,9 +1,14 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json, sys
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
try:
# python2
from urllib2 import urlopen, URLError, HTTPError
from urllib import urlencode
except ImportError:
# python3
from urllib.parse import urlencode
from urllib.request import urlopen, URLError, HTTPError
def main(args):
@ -26,11 +31,13 @@ def main(args):
data['rev'] = args.rev
requesturl = apiurl+'getText?'+urlencode(data)
if args.showurl:
print requesturl
print (requesturl)
else:
results = json.load(urlopen(requesturl))
resp = urlopen(requesturl).read()
resp = resp.decode("utf-8")
results = json.loads(resp)
if args.format == "json":
print json.dumps(results)
print (json.dumps(results))
else:
if results['data']:
sys.stdout.write(results['data']['text'].encode("utf-8"))
sys.stdout.write(results['data']['text'])

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python
from __future__ import print_function
from html5lib import parse
import os, sys

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import sys, json, re, os, urlparse

View File

@ -1,8 +1,17 @@
from __future__ import print_function
from argparse import ArgumentParser
from urlparse import urlparse, urlunparse
from urllib2 import urlopen, URLError, HTTPError
from urllib import urlencode
try:
# python2
from urlparse import urlparse, urlunparse
from urllib2 import urlopen, URLError, HTTPError
from urllib import urlencode
input = raw_input
except ImportError:
# python3
from urllib.parse import urlparse, urlunparse, urlencode
from urllib.request import urlopen, URLError, HTTPError
import json, os, sys
def get_api(url, cmd=None, data=None, verbose=False):
@ -13,7 +22,9 @@ def get_api(url, cmd=None, data=None, verbose=False):
# data['apikey'] = "7c8faa070c97f83d8f705c935a32d5141f89cbaa2158042fa92e8ddad5dbc5e1"
if verbose:
print ("trying", useurl, file=sys.stderr)
resp = json.load(urlopen(useurl))
resp = urlopen(useurl).read()
resp = resp.decode("utf-8")
resp = json.loads(resp)
if "code" in resp and "message" in resp:
return resp
except ValueError as e:
@ -25,10 +36,11 @@ def get_api(url, cmd=None, data=None, verbose=False):
print (" HTTPError", e, file=sys.stderr)
if e.code == 401:
# Unauthorized is how the API responds to an incorrect API key
resp = json.load(e)
if "code" in resp and "message" in resp:
# print ("returning", resp, file=sys.stderr)
return resp
return {"code": 401, "message": e}
# resp = json.load(e)
# if "code" in resp and "message" in resp:
# # print ("returning", resp, file=sys.stderr)
# return resp
def tryapiurl (url, verbose=False):
"""
@ -100,7 +112,7 @@ def main(args):
if apiurl:
# print ("Got APIURL: {0}".format(apiurl))
break
apiurl = raw_input("Please type the URL of the etherpad: ").strip()
apiurl = input("Please type the URL of the etherpad: ").strip()
padinfo["apiurl"] = apiurl
apikey = args.apikey
while True:
@ -112,7 +124,7 @@ def main(args):
else:
print ("bad")
print ("The APIKEY is the contents of the file APIKEY.txt in the etherpad folder", file=sys.stderr)
apikey = raw_input("Please paste the APIKEY: ").strip()
apikey = input("Please paste the APIKEY: ").strip()
padinfo["apikey"] = apikey
with open(padinfopath, "w") as f:

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json, os, re
from urllib import urlencode
@ -41,5 +40,5 @@ def main(args):
pass
for i in items:
newloc = os.path.join(itembase, i)
print "'{0}' => '{1}'".format(i, newloc)
print ("'{0}' => '{1}'".format(i, newloc))
os.rename(i, newloc)

View File

@ -1,10 +1,18 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from common import getjson
import sys
from etherdump.commands.common import getjson
try:
# python2
from urlparse import urlparse, urlunparse
from urllib2 import urlopen, URLError, HTTPError
from urllib import urlencode
input = raw_input
except ImportError:
# python3
from urllib.parse import urlparse, urlunparse, urlencode
from urllib.request import urlopen, URLError, HTTPError
def main (args):
p = ArgumentParser("call listAllPads and print the results")
@ -21,12 +29,12 @@ def main (args):
data['apikey'] = info['apikey']
requesturl = apiurl+'listAllPads?'+urlencode(data)
if args.showurl:
print requesturl
print (requesturl)
else:
results = getjson(requesturl)['data']['padIDs']
if args.format == "json":
print json.dumps(results)
print (json.dumps(results))
else:
for r in results:
print r.encode("utf-8")
print (r)

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json
from urllib import urlencode
@ -22,11 +21,11 @@ def main(args):
data['padID'] = args.padid.encode("utf-8")
requesturl = apiurl+'listAuthorsOfPad?'+urlencode(data)
if args.showurl:
print requesturl
print (requesturl)
else:
results = json.load(urlopen(requesturl))['data']['authorIDs']
if args.format == "json":
print json.dumps(results)
print (json.dumps(results))
else:
for r in results:
print r.encode("utf-8")
print (r.encode("utf-8"))

View File

@ -1,13 +1,20 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import sys, json, re, os
from datetime import datetime
from urllib import urlencode, quote
from urllib2 import HTTPError
from common import *
try:
# python2
from urllib2 import urlopen, URLError, HTTPError
from urllib import urlencode
except ImportError:
# python3
from urllib.parse import urlencode, quote
from urllib.request import urlopen, URLError, HTTPError
from etherdump.commands.common import *
from time import sleep
from html5tidy import html5tidy
from etherdump.commands.html5tidy import html5tidy
import html5lib
from xml.etree import ElementTree as ET
# debugging
@ -49,6 +56,9 @@ def main (args):
p.add_argument("--output", default=False, action="store_true", help="output changed padids on stdout")
p.add_argument("--force", default=False, action="store_true", help="reload, even if revisions count matches previous")
p.add_argument("--no-raw-ext", default=False, action="store_true", help="save plain text as padname with no (additional) extension")
p.add_argument("--fix-names", default=False, action="store_true", help="normalize padid's (no spaces, special control chars) for use in file names")
p.add_argument("--filter-ext", default=None, help="filter pads by extension")
p.add_argument("--css", default="/styles.css", help="add css url to output pages, default: /styles.css")
p.add_argument("--script", default="/versions.js", help="add script url to output pages, default: /versions.js")
@ -79,7 +89,7 @@ def main (args):
progressbar(i, numpads, padid)
data['padID'] = padid.encode("utf-8")
p = padpath(padid, args.pub, args.group)
p = padpath(padid, args.pub, args.group, args.fix_names)
if args.folder:
p = os.path.join(p, padid.encode("utf-8"))
@ -89,8 +99,8 @@ def main (args):
skip = False
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
meta = {}
if type(padurlbase) == unicode:
padurlbase = padurlbase.encode("utf-8")
# if type(padurlbase) == unicode:
# padurlbase = padurlbase.encode("utf-8")
while True:
try:
if os.path.exists(metapath):
@ -101,10 +111,10 @@ def main (args):
skip=True
break
meta['padid'] = padid.encode("utf-8")
meta['padid'] = padid # .encode("utf-8")
versions = meta["versions"] = []
versions.append({
"url": padurlbase + quote(padid.encode("utf-8")), # this quote was really important for dealing with rogue chars like \xa0 in a padid;
"url": padurlbase + quote(padid),
"type": "pad",
"code": 200
})
@ -129,13 +139,13 @@ def main (args):
except HTTPError as e:
tries += 1
if tries > 3:
print ("Too many failures ({0}), skipping".format(padid).encode("utf-8"), file=sys.stderr)
print ("Too many failures ({0}), skipping".format(padid), file=sys.stderr)
skip=True
break
else:
sleep(3)
except TypeError as e:
print ("Type Error loading pad {0} (phantom pad?), skipping".format(padid).encode("utf-8"), file=sys.stderr)
print ("Type Error loading pad {0} (phantom pad?), skipping".format(padid), file=sys.stderr)
skip=True
break
@ -145,7 +155,7 @@ def main (args):
count += 1
if args.output:
print (padid.encode("utf-8"))
print (padid)
if args.all or (args.meta or args.text or args.html or args.dhtml):
try:
@ -172,7 +182,7 @@ def main (args):
ver["path"] = p+raw_ext
ver["url"] = quote(ver["path"])
with open(ver["path"], "w") as f:
f.write(text.encode("utf-8"))
f.write(text)
# once the content is settled, compute a hash
# and link it in the metadata!
@ -180,7 +190,7 @@ def main (args):
if args.css:
links.append({"href":args.css, "rel":"stylesheet"})
# todo, make this process reflect which files actually were made
versionbaseurl = quote(padid.encode("utf-8"))
versionbaseurl = quote(padid)
links.append({"href":versions[0]["url"], "rel":"alternate", "type":"text/html", "title":"Etherpad"})
links.append({"href":versionbaseurl+raw_ext, "rel":"alternate", "type":"text/plain", "title":"Plain text"})
links.append({"href":versionbaseurl+".raw.html", "rel":"alternate", "type":"text/html", "title":"HTML"})
@ -198,11 +208,12 @@ def main (args):
html = html['data']['html']
ver["path"] = p+".diff.html"
ver["url"] = quote(ver["path"])
doc = html5lib.parse(html.encode("utf-8"), treebuilder="etree", encoding="utf-8", namespaceHTMLElements=False)
# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
with open(ver["path"], "w") as f:
# f.write(html.encode("utf-8"))
print(ET.tostring(doc, method="html", encoding="utf-8"), file=f)
print(ET.tostring(doc, method="html", encoding="unicode"), file=f)
# Process text, html, dhtml, all options
if args.all or args.html:
@ -218,7 +229,7 @@ def main (args):
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
with open(ver["path"], "w") as f:
# f.write(html.encode("utf-8"))
print (ET.tostring(doc, method="html", encoding="utf-8"), file=f)
print (ET.tostring(doc, method="html", encoding="unicode"), file=f)
# output meta
if args.all or args.meta:

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json
from urllib import urlencode
@ -20,7 +19,7 @@ def main(args):
data['padID'] = args.padid.encode("utf-8")
requesturl = apiurl+'getRevisionsCount?'+urlencode(data)
if args.showurl:
print requesturl
print (requesturl)
else:
results = json.load(urlopen(requesturl))['data']['revisions']
print results
print (results)

View File

@ -0,0 +1,66 @@
from __future__ import print_function
from argparse import ArgumentParser
import json, sys
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
import requests
LIMIT_BYTES = 100*1000
def main(args):
p = ArgumentParser("calls the setHTML API function for the given padid")
p.add_argument("padid", help="the padid")
p.add_argument("--html", default=None, help="html, default: read from stdin")
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
p.add_argument("--showurl", default=False, action="store_true")
# p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--create", default=False, action="store_true", help="flag to create pad if necessary")
p.add_argument("--limit", default=False, action="store_true", help="limit text to 100k (etherpad limit)")
args = p.parse_args(args)
with open(args.padinfo) as f:
info = json.load(f)
apiurl = info.get("apiurl")
# apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
# data = {}
# data['apikey'] = info['apikey']
# data['padID'] = args.padid # is utf-8 encoded
createPad = False
if args.create:
# check if it's in fact necessary
requesturl = apiurl+'getRevisionsCount?'+urlencode({'apikey': info['apikey'], 'padID': args.padid})
results = json.load(urlopen(requesturl))
print (json.dumps(results, indent=2), file=sys.stderr)
if results['code'] != 0:
createPad = True
if args.html:
html = args.html
else:
html = sys.stdin.read()
params = {}
params['apikey'] = info['apikey']
params['padID'] = args.padid
if createPad:
requesturl = apiurl+'createPad'
if args.showurl:
print (requesturl)
results = requests.post(requesturl, params=params, data={'text': ''}) # json.load(urlopen(requesturl))
results = json.loads(results.text)
print (json.dumps(results, indent=2))
if len(html) > LIMIT_BYTES and args.limit:
print ("limiting", len(text), LIMIT_BYTES, file=sys.stderr)
html = html[:LIMIT_BYTES]
requesturl = apiurl+'setHTML'
if args.showurl:
print (requesturl)
# params['html'] = html
results = requests.post(requesturl, params={'apikey': info['apikey']}, data={'apikey': info['apikey'], 'padID': args.padid, 'html': html}) # json.load(urlopen(requesturl))
results = json.loads(results.text)
print (json.dumps(results, indent=2))

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json, sys
from urllib import urlencode
@ -32,7 +31,7 @@ def main(args):
if args.create:
requesturl = apiurl+'getRevisionsCount?'+urlencode(data)
results = json.load(urlopen(requesturl))
# print json.dumps(results, indent=2)
# print (json.dumps(results, indent=2))
if results['code'] != 0:
createPad = True
@ -42,7 +41,7 @@ def main(args):
text = sys.stdin.read()
if len(text) > LIMIT_BYTES and args.limit:
print "limiting", len(text), LIMIT_BYTES
print ("limiting", len(text), LIMIT_BYTES)
text = text[:LIMIT_BYTES]
data['text'] = text
@ -53,8 +52,9 @@ def main(args):
requesturl = apiurl+'setText'
if args.showurl:
print requesturl
else:
results = requests.post(requesturl, data=data) # json.load(urlopen(requesturl))
results = json.loads(results.text)
print json.dumps(results, indent=2)
print (requesturl)
results = requests.post(requesturl, params=data) # json.load(urlopen(requesturl))
results = json.loads(results.text)
if results['code'] != 0:
print (u"setText: ERROR ({0}) on pad {1}: {2}".format(results['code'], args.padid, results['message']).encode("utf-8"))
# json.dumps(results, indent=2)

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json, sys, re
@ -27,6 +26,6 @@ def main (args):
meta = json.load(f)
formatstr = args.format.decode("utf-8")
formatstr = re.sub(ur"{(\w+)}", r"{0[\1]}", formatstr)
formatstr = re.sub(r"{(\w+)}", r"{0[\1]}", formatstr)
print (formatstr.format(meta).encode("utf-8"))

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import sys, json, re, os

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
import distutils.command.install_lib
from distutils.core import setup
import os
@ -26,7 +26,7 @@ setup(
#package_data={'activearchives': find("activearchives", "templates/") + find("activearchives", "data/")},
package_data={'etherdump': find("etherdump", "data/")},
scripts=['bin/etherdump'],
url='http://activearchives.org/wiki/Etherdump/',
url='http://activearchives.org/wiki/Etherdump',
license='LICENSE.txt',
description='Etherdump an etherpad publishing & archiving system',
# long_description=open('README.md').read(),