This commit is contained in:
Michael Murtaugh 2015-12-04 17:17:32 +01:00
parent 568a8f0790
commit 8d5ebd6f01
5 changed files with 257 additions and 5 deletions

View File

@ -0,0 +1,23 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json, os
def main(args):
p = ArgumentParser("")
p.add_argument("input", nargs="+", help="filenames")
p.add_argument("--indent", type=int, default=2, help="indent")
args = p.parse_args(args)
inputs = args.input
inputs.sort()
ret = []
for p in inputs:
with open(p) as f:
meta = json.load(f)
ret.append(meta)
if args.indent:
print (json.dumps(ret, indent=args.indent))
else:
print (json.dumps(ret))

View File

@ -11,6 +11,9 @@ def splitpadname (padid):
else: else:
return (u"", padid) return (u"", padid)
def padurl (padid, ):
return padid
def padpath (padid, pub_path=u"", group_path=u""): def padpath (padid, pub_path=u"", group_path=u""):
g, p = splitpadname(padid) g, p = splitpadname(padid)
if type(g) == unicode: if type(g) == unicode:

View File

@ -0,0 +1,60 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json, os, re
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from jinja2 import FileSystemLoader, Environment
def group (items, key=lambda x: x):
ret = []
keys = {}
for item in items:
k = key(item)
if k not in keys:
keys[k] = []
keys[k].append(item)
for k in sorted(keys):
keys[k].sort()
ret.append(keys[k])
return ret
def main(args):
p = ArgumentParser("")
p.add_argument("input", nargs="+", help="filenames")
p.add_argument("--templates", default=None, help="templates path")
args = p.parse_args(args)
tmpath = args.templates
if tmpath == None:
tmpath = os.path.split(os.path.abspath(__file__))[0]
tmpath = os.path.split(tmpath)[0]
tmpath = os.path.join(tmpath, "data", "templates")
env = Environment(loader=FileSystemLoader(tmpath))
template = env.get_template("pad_index.html")
inputs = args.input
inputs.sort()
inputs = [x for x in inputs if os.path.isdir(x)]
def base (x):
return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x)
# TODO: MODIFY THIS TO MAKE THE OUTPUT JOINABLE with the collected META DATA
# evt: how can the metadata become a GRAPH structure!!! with each output DOCUMENT
#
print ("<ol>")
for x in inputs:
padid = x
metapath = os.path.join(x, "{0}.meta.json".format(padid))
if os.path.exists(metapath):
print ("""<li><a href="{0}">{0}</a></li>""".format(x))
with open(metapath) as f:
meta = json.load(f)
indexpath = os.path.join(x, "index.html")
with open(indexpath, "w") as f:
print (template.render(**meta).encode("utf-8"), file=f)
print ("</ol>")

View File

@ -14,6 +14,11 @@ pull(meta):
Update meta data files for those that have changed. Update meta data files for those that have changed.
Check for changed pads by looking at revisions & comparing to existing Check for changed pads by looking at revisions & comparing to existing
todo...
use/prefer public interfaces ? (export functions)
""" """
def main (args): def main (args):
@ -29,6 +34,9 @@ def main (args):
p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False") p.add_argument("--html", default=False, action="store_true", help="download html to PADID.html, default: False")
p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.dhtml, default: False") p.add_argument("--dhtml", default=False, action="store_true", help="download dhtml to PADID.dhtml, default: False")
p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False") p.add_argument("--all", default=False, action="store_true", help="download all files (meta, text, html, dhtml), default: False")
p.add_argument("--folder", default=False, action="store_true", help="dump files to folder named PADID (meta, text, html, dhtml), default: False")
p.add_argument("--output", default=False, action="store_true", help="output changed padids on stdout")
p.add_argument("--force", default=False, action="store_true", help="reload, even if revisions count matches previous")
args = p.parse_args(args) args = p.parse_args(args)
info = loadpadinfo(args.padinfo) info = loadpadinfo(args.padinfo)
@ -44,27 +52,51 @@ def main (args):
# maxmsglen = 0 # maxmsglen = 0
count = 0 count = 0
for i, padid in enumerate(padids): for i, padid in enumerate(padids):
# TODO...
"""
Self-containted documents / and/or document receipts
storing enough information to reconstruct (or understand an error occurred)
"""
if args.skip != None and i<args.skip: if args.skip != None and i<args.skip:
continue continue
progressbar(i, numpads, padid) progressbar(i, numpads, padid)
data['padID'] = padid.encode("utf-8") data['padID'] = padid.encode("utf-8")
p = padpath(padid, args.pub, args.group) p = padpath(padid, args.pub, args.group)
if args.folder:
try:
os.makedirs(p)
except OSError:
pass
p = os.path.join(p, padid.encode("utf-8"))
metapath = p + ".meta.json" metapath = p + ".meta.json"
revisions = None revisions = None
tries = 1 tries = 1
skip = False skip = False
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
if type(padurlbase) == unicode:
padurlbase = padurlbase.encode("utf-8")
while True: while True:
try: try:
if os.path.exists(metapath): if os.path.exists(metapath):
with open(metapath) as f: with open(metapath) as f:
meta = json.load(f) meta = json.load(f)
revisions = getjson(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] revisions = getjson(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
if meta['revisions'] == revisions: if meta['revisions'] == revisions and not args.force:
skip=True skip=True
break break
## TODO: OUTPUT TO DIRECTORIES with DATA EMBEDDED IN DOCUMENTS
## (or else in surrounding meta data!!)
meta = {'padid': padid.encode("utf-8")} meta = {'padid': padid.encode("utf-8")}
# this should be less of a hack
# TODO TEST!!!
meta["padurl"] = padurlbase + padid.encode("utf-8")
if revisions == None: if revisions == None:
meta['revisions'] = getjson(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions'] meta['revisions'] = getjson(info['apiurl']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
else: else:
@ -76,7 +108,7 @@ def main (args):
break break
# todo: load more metadata! # todo: load more metadata!
meta['pad'], meta['group'] = splitpadname(padid) meta['group'], meta['pad'] = splitpadname(padid)
meta['pathbase'] = p meta['pathbase'] = p
meta['lastedited_raw'] = int(getjson(info['apiurl']+'getLastEdited?'+urlencode(data))['data']['lastEdited']) meta['lastedited_raw'] = int(getjson(info['apiurl']+'getLastEdited?'+urlencode(data))['data']['lastEdited'])
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat() meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat()
@ -88,13 +120,16 @@ def main (args):
print ("Too many failures ({0}), skipping".format(padid).encode("utf-8"), file=sys.stderr) print ("Too many failures ({0}), skipping".format(padid).encode("utf-8"), file=sys.stderr)
skip=True skip=True
break break
else:
sleep(3)
if skip: if skip:
continue continue
count += 1 count += 1
print (padid.encode("utf-8")) if args.output:
print (padid.encode("utf-8"))
if args.all or (args.meta or args.text or args.html or args.dhtml): if args.all or (args.meta or args.text or args.html or args.dhtml):
try: try:
@ -104,7 +139,7 @@ def main (args):
if args.all or args.meta: if args.all or args.meta:
with open(metapath, "w") as f: with open(metapath, "w") as f:
json.dump(meta, f) json.dump(meta, f, indent=2)
# Process text, html, dhtml, all options # Process text, html, dhtml, all options
if args.all or args.text: if args.all or args.text:
@ -112,6 +147,9 @@ def main (args):
text = text['data']['text'] text = text['data']['text']
with open(p+".txt", "w") as f: with open(p+".txt", "w") as f:
f.write(text.encode("utf-8")) f.write(text.encode("utf-8"))
# once the content is settled, compute a hash
# and link it in the metadata!
if args.all or args.html: if args.all or args.html:
html = getjson(info['apiurl']+'getHTML?'+urlencode(data)) html = getjson(info['apiurl']+'getHTML?'+urlencode(data))
@ -143,4 +181,4 @@ def main (args):
else: else:
sleep(0.1) sleep(0.1)
print("\n{0} pad(s) changed".format(count), file=sys.stderr) print("\n{0} pad(s) loaded".format(count), file=sys.stderr)

View File

@ -0,0 +1,128 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style>
body {
overflow: hidden;
margin: 0;
padding: 0;
}
td {
vertical-align: top;
}
table ul {
margin: 0;
padding-left: 0;
list-style: none;
}
#content {
position: absolute;
left:0;
top: 0;
right: 0;
bottom: 0;
overflow: auto;
background: gray;
}
iframe {
position: absolute;
left: 0; top: 0;
width: 100%;
height: 100%;
border: none;
margin: 0;
padding: 0;
background: white;
}
#overlay {
position: absolute;
z-index: 10;
right: 20px;
top: 40px;
width: auto;
height: auto;
overflow: hidden;
background: black;
color: white;
padding: 10px;
font-size: 10px;
font-family: monospace;
}
#overlay a {
color: white;
text-decoration: none;
}
#overlay a:hover {
background: gray;
color: black;
}
a.active {
background: white !important;
color: black !important;
}
td.key {
font-style: italic;
}
#overlay.open {}
#overlay .closedcontent { display: block; }
#overlay.open .closedcontent { display: none; }
#overlay .opencontent { display: none; }
#overlay.open .opencontent { display: block; }
</style>
</head>
<body>
<div id="content">
<iframe src="{{padid}}.html" id="frame" name="frame"></iframe>
<div id="overlay">
<div class="closedcontent">
versions
</div>
<table class="opencontent">
<tr><td class="key">padid</td><td>{{padid}}</td></tr>
<tr><td class="key">lastedited</td><td>{{lastedited_iso}}</td></tr>
<tr><td class="key">revisions</td><td>{{revisions}}</td></tr>
<tr>
<td class="key">versions</td>
<td>
<ul>
<li><a href="{{padurl}}" target="frame">Etherpad (editable)</a><li>
<li><a href="{{padid}}.html" target="frame">HTML</a></li>
<li><a href="{{padid}}.txt" target="frame">plain text</a></li>
<li><a href="{{padid}}.diff.html" target="frame">HTML with authorship colors</a></li>
<li><a href="{{padid}}.meta.json" target="frame">Meta data (JSON)</a></li>
</ul>
</td>
</tr>
</table>
</div>
<script>
(function () {
var frame = document.getElementById("frame"),
overlay = document.getElementById("overlay");
frame.addEventListener("load", function () {
var loaded_href = frame.contentDocument.location.href,
links = document.querySelectorAll("#overlay a");
// console.log("load", loaded_href);
for (var i=0, len=links.length; i<len; i++) {
var linkhref = links[i].href;
// console.log("*", linkhref);
if (linkhref == loaded_href) {
links[i].classList.add("active");
} else {
links[i].classList.remove("active");
}
}
});
overlay.addEventListener("mouseenter", function () {
overlay.classList.add("open");
}, false);
overlay.addEventListener("mouseleave", function () {
overlay.classList.remove("open");
}, false);
})()
</script>
</body>
</html>