new pull, new meta style from live constant etherdumpÄ
This commit is contained in:
parent
8d5ebd6f01
commit
2f1c5603e2
@ -56,4 +56,5 @@ etherdump sync
|
|||||||
|
|
||||||
why
|
why
|
||||||
-------
|
-------
|
||||||
Etherdump is useful as a means of dumping the contents of etherpad to files, as a way of opening up the contents of the service to other services / methods / uses / tools / situations. (Files also of course allow for archival tools / methods)
|
Etherdump is useful as a means of dumping the contents of etherpad to files, as a way of opening up the contents of the service to other services / methods / uses / tools / situations. (Files also of course allow for archival tools / methods)
|
||||||
|
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
|
from __future__ import print_function
|
||||||
import re, os, json, sys
|
import re, os, json, sys
|
||||||
from urllib import quote_plus, unquote_plus
|
from urllib import quote_plus, unquote_plus
|
||||||
from math import ceil, floor
|
from math import ceil, floor
|
||||||
from urllib2 import urlopen
|
from urllib2 import urlopen, HTTPError
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
|
||||||
groupnamepat = re.compile(r"^g\.(\w+)\$")
|
groupnamepat = re.compile(r"^g\.(\w+)\$")
|
||||||
def splitpadname (padid):
|
def splitpadname (padid):
|
||||||
@ -39,11 +42,27 @@ def padpath2id (path):
|
|||||||
else:
|
else:
|
||||||
return p.decode("utf-8")
|
return p.decode("utf-8")
|
||||||
|
|
||||||
def getjson (url):
|
def getjson (url, max_retry=3, retry_sleep_time=0.5):
|
||||||
f = urlopen(url)
|
ret = {}
|
||||||
data = f.read()
|
ret["_retries"] = 0
|
||||||
f.close()
|
while ret["_retries"] <= max_retry:
|
||||||
return json.loads(data)
|
try:
|
||||||
|
f = urlopen(url)
|
||||||
|
data = f.read()
|
||||||
|
rurl = f.geturl()
|
||||||
|
f.close()
|
||||||
|
ret.update(json.loads(data))
|
||||||
|
ret["_code"] = f.getcode()
|
||||||
|
if rurl != url:
|
||||||
|
ret["_url"] = rurl
|
||||||
|
return ret
|
||||||
|
except HTTPError as e:
|
||||||
|
print ("HTTPError {0}".format(e), file=sys.stderr)
|
||||||
|
ret["_code"] = e.code
|
||||||
|
ret["_retries"]+=1
|
||||||
|
if retry_sleep_time:
|
||||||
|
sleep(retry_sleep_time)
|
||||||
|
return ret
|
||||||
|
|
||||||
def loadpadinfo(p):
|
def loadpadinfo(p):
|
||||||
with open(p) as f:
|
with open(p) as f:
|
||||||
|
@ -6,6 +6,8 @@ import json, os, re
|
|||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from urllib2 import urlopen, HTTPError, URLError
|
from urllib2 import urlopen, HTTPError, URLError
|
||||||
from jinja2 import FileSystemLoader, Environment
|
from jinja2 import FileSystemLoader, Environment
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
def group (items, key=lambda x: x):
|
def group (items, key=lambda x: x):
|
||||||
ret = []
|
ret = []
|
||||||
@ -33,28 +35,38 @@ def main(args):
|
|||||||
tmpath = os.path.join(tmpath, "data", "templates")
|
tmpath = os.path.join(tmpath, "data", "templates")
|
||||||
|
|
||||||
env = Environment(loader=FileSystemLoader(tmpath))
|
env = Environment(loader=FileSystemLoader(tmpath))
|
||||||
template = env.get_template("pad_index.html")
|
template = env.get_template("index.html")
|
||||||
|
|
||||||
|
def base (x):
|
||||||
|
return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x)
|
||||||
|
|
||||||
inputs = args.input
|
inputs = args.input
|
||||||
inputs.sort()
|
inputs.sort()
|
||||||
inputs = [x for x in inputs if os.path.isdir(x)]
|
inputs = group(inputs, base)
|
||||||
|
|
||||||
def base (x):
|
def loadmeta(paths):
|
||||||
return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x)
|
for p in paths:
|
||||||
|
if p.endswith(".meta.json"):
|
||||||
|
with open(p) as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
inputs = map(loadmeta, inputs)
|
||||||
|
# sort by last edited (reverse)
|
||||||
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
print (template.render({"timestamp": timestamp, "pads": inputs}).encode("utf-8"))
|
||||||
|
|
||||||
# TODO: MODIFY THIS TO MAKE THE OUTPUT JOINABLE with the collected META DATA
|
# TODO: MODIFY THIS TO MAKE THE OUTPUT JOINABLE with the collected META DATA
|
||||||
# evt: how can the metadata become a GRAPH structure!!! with each output DOCUMENT
|
# evt: how can the metadata become a GRAPH structure!!! with each output DOCUMENT
|
||||||
#
|
#
|
||||||
print ("<ol>")
|
# print ("<ol>")
|
||||||
for x in inputs:
|
# for x in inputs:
|
||||||
padid = x
|
# padid = x
|
||||||
metapath = os.path.join(x, "{0}.meta.json".format(padid))
|
# metapath = os.path.join(x, "{0}.meta.json".format(padid))
|
||||||
if os.path.exists(metapath):
|
# if os.path.exists(metapath):
|
||||||
print ("""<li><a href="{0}">{0}</a></li>""".format(x))
|
# print ("""<li><a href="{0}">{0}</a></li>""".format(x))
|
||||||
with open(metapath) as f:
|
# with open(metapath) as f:
|
||||||
meta = json.load(f)
|
# meta = json.load(f)
|
||||||
indexpath = os.path.join(x, "index.html")
|
# indexpath = os.path.join(x, "index.html")
|
||||||
with open(indexpath, "w") as f:
|
# with open(indexpath, "w") as f:
|
||||||
print (template.render(**meta).encode("utf-8"), file=f)
|
|
||||||
|
|
||||||
print ("</ol>")
|
# print ("</ol>")
|
||||||
|
@ -3,7 +3,7 @@ from __future__ import print_function
|
|||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
import sys, json, re, os
|
import sys, json, re, os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from urllib import urlencode
|
from urllib import urlencode, quote
|
||||||
from urllib2 import HTTPError
|
from urllib2 import HTTPError
|
||||||
from common import *
|
from common import *
|
||||||
from time import sleep
|
from time import sleep
|
||||||
@ -26,7 +26,7 @@ def main (args):
|
|||||||
p.add_argument("padid", nargs="*", default=[])
|
p.add_argument("padid", nargs="*", default=[])
|
||||||
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
|
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json")
|
||||||
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
|
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
|
||||||
p.add_argument("--pub", default=".", help="folder to store files for public pads, default: pub")
|
p.add_argument("--pub", default="p", help="folder to store files for public pads, default: pub")
|
||||||
p.add_argument("--group", default="g", help="folder to store files for group pads, default: g")
|
p.add_argument("--group", default="g", help="folder to store files for group pads, default: g")
|
||||||
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
|
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
|
||||||
p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False")
|
p.add_argument("--meta", default=False, action="store_true", help="download meta to PADID.meta.json, default: False")
|
||||||
@ -34,7 +34,7 @@ def main (args):
|
|||||||
p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False")
|
p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False")
|
||||||
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.dhtml, default: False")
|
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.dhtml, default: False")
|
||||||
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")
|
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")
|
||||||
p.add_argument("--folder", default=False, action="store_true", help="dump files to folder named PADID (meta, text, html, dhtml), default: False")
|
p.add_argument("--folder", default=False, action="store_true", help="dump files in a folder named PADID (meta, text, html, dhtml), default: False")
|
||||||
p.add_argument("--output", default=False, action="store_true", help="output changed padids on stdout")
|
p.add_argument("--output", default=False, action="store_true", help="output changed padids on stdout")
|
||||||
p.add_argument("--force", default=False, action="store_true", help="reload, even if revisions count matches previous")
|
p.add_argument("--force", default=False, action="store_true", help="reload, even if revisions count matches previous")
|
||||||
args = p.parse_args(args)
|
args = p.parse_args(args)
|
||||||
@ -66,10 +66,6 @@ storing enough information to reconstruct (or understand an error occurred)
|
|||||||
data['padID'] = padid.encode("utf-8")
|
data['padID'] = padid.encode("utf-8")
|
||||||
p = padpath(padid, args.pub, args.group)
|
p = padpath(padid, args.pub, args.group)
|
||||||
if args.folder:
|
if args.folder:
|
||||||
try:
|
|
||||||
os.makedirs(p)
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
p = os.path.join(p, padid.encode("utf-8"))
|
p = os.path.join(p, padid.encode("utf-8"))
|
||||||
|
|
||||||
metapath = p + ".meta.json"
|
metapath = p + ".meta.json"
|
||||||
@ -77,13 +73,14 @@ storing enough information to reconstruct (or understand an error occurred)
|
|||||||
tries = 1
|
tries = 1
|
||||||
skip = False
|
skip = False
|
||||||
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
|
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
|
||||||
|
meta = {}
|
||||||
if type(padurlbase) == unicode:
|
if type(padurlbase) == unicode:
|
||||||
padurlbase = padurlbase.encode("utf-8")
|
padurlbase = padurlbase.encode("utf-8")
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
if os.path.exists(metapath):
|
if os.path.exists(metapath):
|
||||||
with open(metapath) as f:
|
with open(metapath) as f:
|
||||||
meta = json.load(f)
|
meta.update(json.load(f))
|
||||||
revisions = getjson(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
|
revisions = getjson(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
|
||||||
if meta['revisions'] == revisions and not args.force:
|
if meta['revisions'] == revisions and not args.force:
|
||||||
skip=True
|
skip=True
|
||||||
@ -91,11 +88,13 @@ storing enough information to reconstruct (or understand an error occurred)
|
|||||||
|
|
||||||
## TODO: OUTPUT TO DIRECTORIES with DATA EMBEDDED IN DOCUMENTS
|
## TODO: OUTPUT TO DIRECTORIES with DATA EMBEDDED IN DOCUMENTS
|
||||||
## (or else in surrounding meta data!!)
|
## (or else in surrounding meta data!!)
|
||||||
meta = {'padid': padid.encode("utf-8")}
|
meta['padid'] = padid.encode("utf-8")
|
||||||
# this should be less of a hack
|
versions = meta["versions"] = []
|
||||||
# TODO TEST!!!
|
versions.append({
|
||||||
|
"url": padurlbase + padid.encode("utf-8"),
|
||||||
meta["padurl"] = padurlbase + padid.encode("utf-8")
|
"type": "pad",
|
||||||
|
"code": 200
|
||||||
|
})
|
||||||
|
|
||||||
if revisions == None:
|
if revisions == None:
|
||||||
meta['revisions'] = getjson(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
|
meta['revisions'] = getjson(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
|
||||||
@ -137,48 +136,53 @@ storing enough information to reconstruct (or understand an error occurred)
|
|||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Process text, html, dhtml, all options
|
||||||
|
if args.all or args.html:
|
||||||
|
html = getjson(info['apiurl']+'getHTML?'+urlencode(data))
|
||||||
|
ver = {"type": "html"}
|
||||||
|
versions.append(ver)
|
||||||
|
ver["code"] = html["_code"]
|
||||||
|
if html["_code"] == 200:
|
||||||
|
html = html['data']['html']
|
||||||
|
ver["path"] = p+".raw.html"
|
||||||
|
ver["url"] = quote(ver["path"])
|
||||||
|
with open(ver["path"], "w") as f:
|
||||||
|
f.write(html.encode("utf-8"))
|
||||||
|
|
||||||
|
if args.all or args.text:
|
||||||
|
text = getjson(info['apiurl']+'getText?'+urlencode(data))
|
||||||
|
ver = {"type": "text"}
|
||||||
|
versions.append(ver)
|
||||||
|
ver["code"] = text["_code"]
|
||||||
|
if text["_code"] == 200:
|
||||||
|
text = text['data']['text']
|
||||||
|
ver["path"] = p+".raw.txt"
|
||||||
|
ver["url"] = quote(ver["path"])
|
||||||
|
with open(ver["path"], "w") as f:
|
||||||
|
f.write(text.encode("utf-8"))
|
||||||
|
# once the content is settled, compute a hash
|
||||||
|
# and link it in the metadata!
|
||||||
|
|
||||||
|
if args.all or args.dhtml:
|
||||||
|
data['startRev'] = "0"
|
||||||
|
html = getjson(info['apiurl']+'createDiffHTML?'+urlencode(data))
|
||||||
|
ver = {"type": "diffhtml"}
|
||||||
|
versions.append(ver)
|
||||||
|
ver["code"] = html["_code"]
|
||||||
|
if html["_code"] == 200:
|
||||||
|
html = html['data']['html']
|
||||||
|
ver["path"] = p+".diff.html"
|
||||||
|
ver["url"] = quote(ver["path"])
|
||||||
|
with open(ver["path"], "w") as f:
|
||||||
|
f.write(html.encode("utf-8"))
|
||||||
|
|
||||||
|
# output meta
|
||||||
if args.all or args.meta:
|
if args.all or args.meta:
|
||||||
|
ver = {"type": "meta"}
|
||||||
|
versions.append(ver)
|
||||||
|
ver["path"] = metapath
|
||||||
|
ver["url"] = quote(metapath)
|
||||||
with open(metapath, "w") as f:
|
with open(metapath, "w") as f:
|
||||||
json.dump(meta, f, indent=2)
|
json.dump(meta, f, indent=2)
|
||||||
|
|
||||||
# Process text, html, dhtml, all options
|
|
||||||
if args.all or args.text:
|
|
||||||
text = getjson(info['apiurl']+'getText?'+urlencode(data))
|
|
||||||
text = text['data']['text']
|
|
||||||
with open(p+".txt", "w") as f:
|
|
||||||
f.write(text.encode("utf-8"))
|
|
||||||
# once the content is settled, compute a hash
|
|
||||||
# and link it in the metadata!
|
|
||||||
|
|
||||||
|
|
||||||
if args.all or args.html:
|
|
||||||
html = getjson(info['apiurl']+'getHTML?'+urlencode(data))
|
|
||||||
html = html['data']['html']
|
|
||||||
with open(p+".html", "w") as f:
|
|
||||||
f.write(html.encode("utf-8"))
|
|
||||||
|
|
||||||
if args.all or args.dhtml:
|
|
||||||
tries = 0
|
|
||||||
skip = False
|
|
||||||
while not skip:
|
|
||||||
try:
|
|
||||||
data['startRev'] = "0"
|
|
||||||
html = getjson(info['apiurl']+'createDiffHTML?'+urlencode(data))
|
|
||||||
html = html['data']['html']
|
|
||||||
with open(p+".diff.html", "w") as f:
|
|
||||||
f.write(html.encode("utf-8"))
|
|
||||||
break
|
|
||||||
except HTTPError as e:
|
|
||||||
print ("HTTPERROR {0}".format(e), file=sys.stderr)
|
|
||||||
tries += 1
|
|
||||||
if tries >= 5:
|
|
||||||
print (" Too many errors, deleting .diff.html and skipping", file=sys.stderr)
|
|
||||||
try:
|
|
||||||
os.remove(p+".diff.html")
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
skip=True
|
|
||||||
else:
|
|
||||||
sleep(0.1)
|
|
||||||
|
|
||||||
print("\n{0} pad(s) loaded".format(count), file=sys.stderr)
|
print("\n{0} pad(s) loaded".format(count), file=sys.stderr)
|
||||||
|
@ -2,23 +2,39 @@
|
|||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8" />
|
<meta charset="utf-8" />
|
||||||
<title>{% block title %}{{title}}{% endblock %}</title>
|
<title>etherdump</title>
|
||||||
<link rel="stylesheet" type="text/css" href="{%block css %}styles.css{%endblock%}">
|
<link rel="stylesheet" type="text/css" href="{%block css %}styles.css{%endblock%}">
|
||||||
{% block scripts %}
|
{% block scripts %}
|
||||||
<script src="jquery-latest.js"></script>
|
<script src="jquery-latest.js"></script>
|
||||||
<script src="jquery.tablesorter.min.js"></script>
|
<script src="jquery.tablesorter.min.js"></script>
|
||||||
<script src="script.js"></script>
|
|
||||||
{% endblock scripts %}
|
{% endblock scripts %}
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
{% block header %}<h1>{{title}}</h1>{% endblock %}
|
{% block info %}<p class="info">This listing is updated automatically once daily. Last update {{timestamp}}.</p>{% endblock %}
|
||||||
{% block info %}<p class="info">Last updated {{timestamp}}</p>{% endblock %}
|
|
||||||
{% block namefilter %}
|
{% block namefilter %}
|
||||||
<div id="namefilter">
|
<div id="namefilter">
|
||||||
<input type="text" id="namefilterinput" value="" placeholder="name filter" autofocus >
|
<input type="text" id="namefilterinput" value="" placeholder="name filter" autofocus >
|
||||||
|
<button id="newpadbutton">go/start pad</button>
|
||||||
</div>
|
</div>
|
||||||
<script>
|
<script>
|
||||||
|
|
||||||
|
var newpadbutton = document.getElementById("newpadbutton");
|
||||||
|
newpadbutton.addEventListener("click", function (e) {
|
||||||
|
var elt = document.getElementById("namefilterinput"),
|
||||||
|
newpadname = elt.value,
|
||||||
|
padbase = document.querySelector("td.versions a").href;
|
||||||
|
newpadname = newpadname.replace(/^\s*/g, "");
|
||||||
|
newpadname = newpadname.replace(/\s*$/g, "");
|
||||||
|
elt.value = newpadname;
|
||||||
|
padbase = padbase.replace(/\/[^/]+$/, "");
|
||||||
|
if (!newpadname) { alert("type the pad name, then click 'go'")}
|
||||||
|
else {
|
||||||
|
var newpadhref = padbase + "/" + encodeURIComponent(newpadname);
|
||||||
|
// console.log("goto", newpadhref);
|
||||||
|
window.location = newpadhref;
|
||||||
|
};
|
||||||
|
e.preventDefault();
|
||||||
|
})
|
||||||
var namefilter = (function (opts) {
|
var namefilter = (function (opts) {
|
||||||
var timeout_id = null,
|
var timeout_id = null,
|
||||||
filter_value = '',
|
filter_value = '',
|
||||||
@ -27,7 +43,7 @@ var namefilter = (function (opts) {
|
|||||||
// console.log("update", filter_value);
|
// console.log("update", filter_value);
|
||||||
var pat = new RegExp(filter_value, "i");
|
var pat = new RegExp(filter_value, "i");
|
||||||
$("tbody tr").each(function () {
|
$("tbody tr").each(function () {
|
||||||
var n = $(".pad_name", this).text();
|
var n = $(".name", this).text();
|
||||||
// console.log("n", n);
|
// console.log("n", n);
|
||||||
if (filter_value == "" || n.match(pat) !== null) {
|
if (filter_value == "" || n.match(pat) !== null) {
|
||||||
$(this).show();
|
$(this).show();
|
||||||
@ -51,7 +67,7 @@ $("#namefilterinput").bind("keyup", function (e) { namefilter($(this).val()); })
|
|||||||
|
|
||||||
$(document).ready(function()
|
$(document).ready(function()
|
||||||
{
|
{
|
||||||
$("table.listing").tablesorter();
|
$("table.listing").tablesorter({sortList: [[2,1]]});
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -61,24 +77,32 @@ $(document).ready(function()
|
|||||||
<table class="listing tablesorter">
|
<table class="listing tablesorter">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
<th>link</th>
|
<th>name</th>
|
||||||
<th>pad name (click to view archived page)</th>
|
<th>versions</th>
|
||||||
<th>group</th>
|
|
||||||
<th>last edited</th>
|
<th>last edited</th>
|
||||||
<th>size</th>
|
<th>revisions</th>
|
||||||
|
<th>authors</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for pad in pads %}
|
{% for pad in pads %}
|
||||||
<tr>
|
<tr>
|
||||||
<td class="pad_url"><a class="edit" href="{{ pad.url }}">edit</a></td>
|
<td class="name">
|
||||||
<td class="pad_name"><a href="{{ pad.html_path }}">{{ pad.pad_name }}</a></td>
|
{{ pad.padid }}
|
||||||
<td class="pad_group">{{ pad.group_name|default(pad.group_id) }}</td>
|
</td>
|
||||||
<td class="pad_last_edited">{{ pad.last_edited_str }}</td>
|
<td class="versions">
|
||||||
<td class="pad_size">{{ pad.text_length_human }}</td>
|
{% for v in pad.versions %}<a href="{{v.url}}">{{v.type}}</a> {% endfor %}
|
||||||
|
</td>
|
||||||
|
<td class="lastedited">{{ pad.lastedited_iso|replace("T", " ") }}</td>
|
||||||
|
<td class="revisions">{{ pad.revisions }}</td>
|
||||||
|
<td class="authors">{{ pad.author_ids|length }}</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
|
<div id="footer">
|
||||||
|
<a href="index">index of all pads (password required)</a>
|
||||||
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
Loading…
Reference in New Issue
Block a user