make file friendliness
This commit is contained in:
parent
5a14737737
commit
d21cc4b21e
@ -43,3 +43,11 @@ subcommands
|
|||||||
To get help on a subcommand:
|
To get help on a subcommand:
|
||||||
|
|
||||||
etherdump revisionscount --help
|
etherdump revisionscount --help
|
||||||
|
|
||||||
|
TODO
|
||||||
|
--------
|
||||||
|
* Modify tools to work with make
|
||||||
|
** Sync command
|
||||||
|
** Dump command that works on a single page
|
||||||
|
** Post processing as separable filters (such as linkify)
|
||||||
|
* Support for migrating (what dump formats exist that would allow pushing to another instance?)
|
||||||
|
25
etherdump/commands/common.py
Normal file
25
etherdump/commands/common.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
import re, os
|
||||||
|
from urllib import quote_plus, unquote_plus
|
||||||
|
|
||||||
|
|
||||||
|
groupnamepat = re.compile(r"^g\.(\w+)\$")
|
||||||
|
def splitpadname (padid):
|
||||||
|
m = groupnamepat.match(padid)
|
||||||
|
if m:
|
||||||
|
return(m.group(1), padid[m.end():])
|
||||||
|
else:
|
||||||
|
return (u"", padid)
|
||||||
|
|
||||||
|
def padpath (padid, pub_path=u"", group_path=u""):
|
||||||
|
g, p = splitpadname(padid)
|
||||||
|
if type(g) == unicode:
|
||||||
|
g = g.encode("utf-8")
|
||||||
|
if type(p) == unicode:
|
||||||
|
p = p.encode("utf-8")
|
||||||
|
p = quote_plus(p)
|
||||||
|
# p = p.replace(" ", "_")
|
||||||
|
# p = p.replace("*", "-")
|
||||||
|
if g:
|
||||||
|
return os.path.join(group_path, g, p)
|
||||||
|
else:
|
||||||
|
return os.path.join(pub_path, p)
|
38
etherdump/commands/creatediffhtml.py
Normal file
38
etherdump/commands/creatediffhtml.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
import json
|
||||||
|
from urllib import urlencode
|
||||||
|
from urllib2 import urlopen, HTTPError, URLError
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
p = ArgumentParser("")
|
||||||
|
p.add_argument("padid", help="the padid")
|
||||||
|
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
||||||
|
p.add_argument("--showurl", default=False, action="store_true")
|
||||||
|
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
|
||||||
|
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
|
||||||
|
args = p.parse_args(args)
|
||||||
|
|
||||||
|
with open(args.padinfo) as f:
|
||||||
|
info = json.load(f)
|
||||||
|
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
||||||
|
data = {}
|
||||||
|
data['apikey'] = info['apikey']
|
||||||
|
data['padID'] = args.padid
|
||||||
|
data['startRev'] = "0"
|
||||||
|
if args.rev != None:
|
||||||
|
data['rev'] = args.rev
|
||||||
|
requesturl = apiurl+'createDiffHTML?'+urlencode(data)
|
||||||
|
if args.showurl:
|
||||||
|
print requesturl
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
results = json.load(urlopen(requesturl))['data']
|
||||||
|
if args.format == "json":
|
||||||
|
print json.dumps(results)
|
||||||
|
else:
|
||||||
|
print results['html'].encode("utf-8")
|
||||||
|
except HTTPError as e:
|
||||||
|
pass
|
@ -6,6 +6,7 @@ from datetime import datetime
|
|||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from urllib2 import urlopen, HTTPError, URLError
|
from urllib2 import urlopen, HTTPError, URLError
|
||||||
from csv import writer
|
from csv import writer
|
||||||
|
from math import ceil, floor
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Dumps a CSV of all pads with columns
|
Dumps a CSV of all pads with columns
|
||||||
@ -32,6 +33,7 @@ def main (args):
|
|||||||
p = ArgumentParser("")
|
p = ArgumentParser("")
|
||||||
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
||||||
p.add_argument("--format", default="csv", help="output format: csv (default), json")
|
p.add_argument("--format", default="csv", help="output format: csv (default), json")
|
||||||
|
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False")
|
||||||
args = p.parse_args(args)
|
args = p.parse_args(args)
|
||||||
|
|
||||||
with open(args.padinfo) as f:
|
with open(args.padinfo) as f:
|
||||||
@ -41,11 +43,23 @@ def main (args):
|
|||||||
data['apikey'] = info['apikey']
|
data['apikey'] = info['apikey']
|
||||||
requesturl = apiurl+'listAllPads?'+urlencode(data)
|
requesturl = apiurl+'listAllPads?'+urlencode(data)
|
||||||
|
|
||||||
results = jsonload(requesturl)['data']['padIDs']
|
padids = jsonload(requesturl)['data']['padIDs']
|
||||||
results.sort()
|
padids.sort()
|
||||||
|
numpads = len(padids)
|
||||||
|
maxmsglen = 0
|
||||||
|
count = 0
|
||||||
out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids"))
|
out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids"))
|
||||||
for padid in results:
|
for i, padid in enumerate(padids):
|
||||||
print (u"{0}".format(padid), file=sys.stderr)
|
p = (float(i) / numpads)
|
||||||
|
percentage = int(floor(p*100))
|
||||||
|
bars = int(ceil(p*20))
|
||||||
|
bar = ("*"*bars) + ("-"*(20-bars))
|
||||||
|
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid)
|
||||||
|
if len(msg) > maxmsglen:
|
||||||
|
maxmsglen = len(msg)
|
||||||
|
sys.stderr.write("\r{0}".format(" "*maxmsglen))
|
||||||
|
sys.stderr.write(msg.encode("utf-8"))
|
||||||
|
sys.stderr.flush()
|
||||||
m = groupnamepat.match(padid)
|
m = groupnamepat.match(padid)
|
||||||
if m:
|
if m:
|
||||||
groupname = m.group(1)
|
groupname = m.group(1)
|
||||||
@ -56,10 +70,16 @@ def main (args):
|
|||||||
|
|
||||||
data['padID'] = padid.encode("utf-8")
|
data['padID'] = padid.encode("utf-8")
|
||||||
revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions']
|
revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions']
|
||||||
|
if (revisions == 0) and not args.zerorevs:
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited']
|
lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited']
|
||||||
lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat()
|
lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat()
|
||||||
author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
|
author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
|
||||||
author_ids = u" ".join(author_ids).encode("utf-8")
|
author_ids = u" ".join(author_ids).encode("utf-8")
|
||||||
out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids))
|
out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids))
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
print("\nWrote {0} rows...".format(count), file=sys.stderr)
|
||||||
|
|
||||||
|
34
etherdump/commands/gethtml.py
Normal file
34
etherdump/commands/gethtml.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
import json
|
||||||
|
from urllib import urlencode
|
||||||
|
from urllib2 import urlopen, HTTPError, URLError
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
p = ArgumentParser("")
|
||||||
|
p.add_argument("padid", help="the padid")
|
||||||
|
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
||||||
|
p.add_argument("--showurl", default=False, action="store_true")
|
||||||
|
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
|
||||||
|
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
|
||||||
|
args = p.parse_args(args)
|
||||||
|
|
||||||
|
with open(args.padinfo) as f:
|
||||||
|
info = json.load(f)
|
||||||
|
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
||||||
|
data = {}
|
||||||
|
data['apikey'] = info['apikey']
|
||||||
|
data['padID'] = args.padid
|
||||||
|
if args.rev != None:
|
||||||
|
data['rev'] = args.rev
|
||||||
|
requesturl = apiurl+'getHTML?'+urlencode(data)
|
||||||
|
if args.showurl:
|
||||||
|
print requesturl
|
||||||
|
else:
|
||||||
|
results = json.load(urlopen(requesturl))['data']
|
||||||
|
if args.format == "json":
|
||||||
|
print json.dumps(results)
|
||||||
|
else:
|
||||||
|
print results['html'].encode("utf-8")
|
@ -12,6 +12,7 @@ def main(args):
|
|||||||
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
||||||
p.add_argument("--showurl", default=False, action="store_true")
|
p.add_argument("--showurl", default=False, action="store_true")
|
||||||
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
|
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
|
||||||
|
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
|
||||||
args = p.parse_args(args)
|
args = p.parse_args(args)
|
||||||
|
|
||||||
with open(args.padinfo) as f:
|
with open(args.padinfo) as f:
|
||||||
@ -19,7 +20,9 @@ def main(args):
|
|||||||
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
||||||
data = {}
|
data = {}
|
||||||
data['apikey'] = info['apikey']
|
data['apikey'] = info['apikey']
|
||||||
data['padID'] = args.padid.encode("utf-8")
|
data['padID'] = args.padid # is utf-8 encoded
|
||||||
|
if args.rev != None:
|
||||||
|
data['rev'] = args.rev
|
||||||
requesturl = apiurl+'getText?'+urlencode(data)
|
requesturl = apiurl+'getText?'+urlencode(data)
|
||||||
if args.showurl:
|
if args.showurl:
|
||||||
print requesturl
|
print requesturl
|
||||||
@ -29,6 +32,3 @@ def main(args):
|
|||||||
print json.dumps(results)
|
print json.dumps(results)
|
||||||
else:
|
else:
|
||||||
print results['text'].encode("utf-8")
|
print results['text'].encode("utf-8")
|
||||||
|
|
||||||
# To save to file run:
|
|
||||||
# python gettext.py > copy.txt
|
|
32
etherdump/commands/showmeta.py
Normal file
32
etherdump/commands/showmeta.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import print_function
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
import json, sys, re
|
||||||
|
from common import *
|
||||||
|
|
||||||
|
"""
|
||||||
|
Extract and output selected fields of metadata
|
||||||
|
"""
|
||||||
|
|
||||||
|
def main (args):
|
||||||
|
p = ArgumentParser("")
|
||||||
|
p.add_argument("--path", default=None, help="read from a meta.json file")
|
||||||
|
p.add_argument("--padid", default=None, help="read meta for this padid")
|
||||||
|
p.add_argument("--format", default="{padid}", help="format str, default: {padid}")
|
||||||
|
args = p.parse_args(args)
|
||||||
|
|
||||||
|
path = args.path
|
||||||
|
if not path and args.padid:
|
||||||
|
path = padpath(args.padid) + ".meta.json"
|
||||||
|
|
||||||
|
if not path:
|
||||||
|
print ("Must specify either --path or --padid")
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
with open(path) as f:
|
||||||
|
meta = json.load(f)
|
||||||
|
|
||||||
|
formatstr = args.format.decode("utf-8")
|
||||||
|
formatstr = re.sub(ur"{(\w+)}", r"{0[\1]}", formatstr)
|
||||||
|
print (formatstr.format(meta).encode("utf-8"))
|
||||||
|
|
96
etherdump/commands/sync.py
Normal file
96
etherdump/commands/sync.py
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import print_function
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
import sys, json, re, os
|
||||||
|
from datetime import datetime
|
||||||
|
from urllib import urlencode
|
||||||
|
from urllib2 import urlopen, HTTPError, URLError
|
||||||
|
from math import ceil, floor
|
||||||
|
from common import *
|
||||||
|
|
||||||
|
"""
|
||||||
|
sync(meta):
|
||||||
|
Update meta data files for those that have changed.
|
||||||
|
Check for changed pads by looking at revisions & comparing to existing
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def jsonload (url):
|
||||||
|
f = urlopen(url)
|
||||||
|
data = f.read()
|
||||||
|
f.close()
|
||||||
|
return json.loads(data)
|
||||||
|
|
||||||
|
def load_padinfo(p):
|
||||||
|
with open(p) as f:
|
||||||
|
info = json.load(f)
|
||||||
|
info['api'] = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
def main (args):
|
||||||
|
p = ArgumentParser("")
|
||||||
|
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
|
||||||
|
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False")
|
||||||
|
p.add_argument("--pub", default="pub", help="pub path for output, default: pub")
|
||||||
|
p.add_argument("--group", default="g", help="group path for output, default: g")
|
||||||
|
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
|
||||||
|
args = p.parse_args(args)
|
||||||
|
|
||||||
|
info = load_padinfo(args.padinfo)
|
||||||
|
data = {}
|
||||||
|
data['apikey'] = info['apikey']
|
||||||
|
padids = jsonload(info['api']+'listAllPads?'+urlencode(data))['data']['padIDs']
|
||||||
|
padids.sort()
|
||||||
|
numpads = len(padids)
|
||||||
|
maxmsglen = 0
|
||||||
|
count = 0
|
||||||
|
for i, padid in enumerate(padids):
|
||||||
|
if args.skip != None and i<args.skip:
|
||||||
|
continue
|
||||||
|
p = (float(i) / numpads)
|
||||||
|
percentage = int(floor(p*100))
|
||||||
|
bars = int(ceil(p*20))
|
||||||
|
bar = ("*"*bars) + ("-"*(20-bars))
|
||||||
|
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid)
|
||||||
|
if len(msg) > maxmsglen:
|
||||||
|
maxmsglen = len(msg)
|
||||||
|
sys.stderr.write("\r{0}".format(" "*maxmsglen))
|
||||||
|
sys.stderr.write(msg.encode("utf-8"))
|
||||||
|
sys.stderr.flush()
|
||||||
|
data['padID'] = padid.encode("utf-8")
|
||||||
|
p = padpath(padid, args.pub, args.group)
|
||||||
|
metapath = p + ".meta.json"
|
||||||
|
revisions = None
|
||||||
|
if os.path.exists(metapath):
|
||||||
|
with open(metapath) as f:
|
||||||
|
meta = json.load(f)
|
||||||
|
revisions = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
|
||||||
|
if meta['revisions'] == revisions:
|
||||||
|
continue
|
||||||
|
|
||||||
|
meta = {'padid': padid.encode("utf-8")}
|
||||||
|
if revisions == None:
|
||||||
|
meta['revisions'] = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
|
||||||
|
else:
|
||||||
|
meta['revisions' ] = revisions
|
||||||
|
|
||||||
|
if (meta['revisions'] == 0) and (not args.zerorevs):
|
||||||
|
# print("Skipping zero revs", file=sys.stderr)
|
||||||
|
continue
|
||||||
|
|
||||||
|
count += 1
|
||||||
|
# todo: load more metadata!
|
||||||
|
meta['lastedited_raw'] = int(jsonload(info['api']+'getLastEdited?'+urlencode(data))['data']['lastEdited'])
|
||||||
|
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat()
|
||||||
|
meta['author_ids'] = jsonload(info['api'] +'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
|
||||||
|
|
||||||
|
# save it
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.split(metapath)[0])
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
with open(metapath, "w") as f:
|
||||||
|
json.dump(meta, f)
|
||||||
|
|
||||||
|
print("\nWrote {0} files...".format(count), file=sys.stderr)
|
Loading…
Reference in New Issue
Block a user