Browse Source

make file friendliness

add-quote-import
Michael Murtaugh 9 years ago
parent
commit
d21cc4b21e
  1. 8
      README.md
  2. 25
      etherdump/commands/common.py
  3. 38
      etherdump/commands/creatediffhtml.py
  4. 28
      etherdump/commands/dumpcsv.py
  5. 34
      etherdump/commands/gethtml.py
  6. 8
      etherdump/commands/gettext.py
  7. 32
      etherdump/commands/showmeta.py
  8. 96
      etherdump/commands/sync.py

8
README.md

@ -43,3 +43,11 @@ subcommands
To get help on a subcommand: To get help on a subcommand:
etherdump revisionscount --help etherdump revisionscount --help
TODO
--------
* Modify tools to work with make
** Sync command
** Dump command that works on a single page
** Post processing as separable filters (such as linkify)
* Support for migrating (what dump formats exist that would allow pushing to another instance?)

25
etherdump/commands/common.py

@ -0,0 +1,25 @@
import re, os
from urllib import quote_plus, unquote_plus
groupnamepat = re.compile(r"^g\.(\w+)\$")
def splitpadname (padid):
m = groupnamepat.match(padid)
if m:
return(m.group(1), padid[m.end():])
else:
return (u"", padid)
def padpath (padid, pub_path=u"", group_path=u""):
g, p = splitpadname(padid)
if type(g) == unicode:
g = g.encode("utf-8")
if type(p) == unicode:
p = p.encode("utf-8")
p = quote_plus(p)
# p = p.replace(" ", "_")
# p = p.replace("*", "-")
if g:
return os.path.join(group_path, g, p)
else:
return os.path.join(pub_path, p)

38
etherdump/commands/creatediffhtml.py

@ -0,0 +1,38 @@
#!/usr/bin/env python
from argparse import ArgumentParser
import json
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
def main(args):
p = ArgumentParser("")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
args = p.parse_args(args)
with open(args.padinfo) as f:
info = json.load(f)
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
data = {}
data['apikey'] = info['apikey']
data['padID'] = args.padid
data['startRev'] = "0"
if args.rev != None:
data['rev'] = args.rev
requesturl = apiurl+'createDiffHTML?'+urlencode(data)
if args.showurl:
print requesturl
else:
try:
results = json.load(urlopen(requesturl))['data']
if args.format == "json":
print json.dumps(results)
else:
print results['html'].encode("utf-8")
except HTTPError as e:
pass

28
etherdump/commands/dumpcsv.py

@ -6,6 +6,7 @@ from datetime import datetime
from urllib import urlencode from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError from urllib2 import urlopen, HTTPError, URLError
from csv import writer from csv import writer
from math import ceil, floor
""" """
Dumps a CSV of all pads with columns Dumps a CSV of all pads with columns
@ -32,6 +33,7 @@ def main (args):
p = ArgumentParser("") p = ArgumentParser("")
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json") p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
p.add_argument("--format", default="csv", help="output format: csv (default), json") p.add_argument("--format", default="csv", help="output format: csv (default), json")
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False")
args = p.parse_args(args) args = p.parse_args(args)
with open(args.padinfo) as f: with open(args.padinfo) as f:
@ -41,11 +43,23 @@ def main (args):
data['apikey'] = info['apikey'] data['apikey'] = info['apikey']
requesturl = apiurl+'listAllPads?'+urlencode(data) requesturl = apiurl+'listAllPads?'+urlencode(data)
results = jsonload(requesturl)['data']['padIDs'] padids = jsonload(requesturl)['data']['padIDs']
results.sort() padids.sort()
numpads = len(padids)
maxmsglen = 0
count = 0
out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids")) out.writerow(("padid", "groupid", "lastedited", "revisions", "author_ids"))
for padid in results: for i, padid in enumerate(padids):
print (u"{0}".format(padid), file=sys.stderr) p = (float(i) / numpads)
percentage = int(floor(p*100))
bars = int(ceil(p*20))
bar = ("*"*bars) + ("-"*(20-bars))
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid)
if len(msg) > maxmsglen:
maxmsglen = len(msg)
sys.stderr.write("\r{0}".format(" "*maxmsglen))
sys.stderr.write(msg.encode("utf-8"))
sys.stderr.flush()
m = groupnamepat.match(padid) m = groupnamepat.match(padid)
if m: if m:
groupname = m.group(1) groupname = m.group(1)
@ -56,10 +70,16 @@ def main (args):
data['padID'] = padid.encode("utf-8") data['padID'] = padid.encode("utf-8")
revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions'] revisions = jsonload(apiurl+'getRevisionsCount?'+urlencode(data))['data']['revisions']
if (revisions == 0) and not args.zerorevs:
continue
lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited'] lastedited_raw = jsonload(apiurl+'getLastEdited?'+urlencode(data))['data']['lastEdited']
lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat() lastedited_iso = datetime.fromtimestamp(int(lastedited_raw)/1000).isoformat()
author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs'] author_ids = jsonload(apiurl+'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
author_ids = u" ".join(author_ids).encode("utf-8") author_ids = u" ".join(author_ids).encode("utf-8")
out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids)) out.writerow((padidnogroup.encode("utf-8"), groupname.encode("utf-8"), revisions, lastedited_iso, author_ids))
count += 1
print("\nWrote {0} rows...".format(count), file=sys.stderr)

34
etherdump/commands/gethtml.py

@ -0,0 +1,34 @@
#!/usr/bin/env python
from argparse import ArgumentParser
import json
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
def main(args):
p = ArgumentParser("")
p.add_argument("padid", help="the padid")
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
args = p.parse_args(args)
with open(args.padinfo) as f:
info = json.load(f)
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
data = {}
data['apikey'] = info['apikey']
data['padID'] = args.padid
if args.rev != None:
data['rev'] = args.rev
requesturl = apiurl+'getHTML?'+urlencode(data)
if args.showurl:
print requesturl
else:
results = json.load(urlopen(requesturl))['data']
if args.format == "json":
print json.dumps(results)
else:
print results['html'].encode("utf-8")

8
etherdump/commands/text.py → etherdump/commands/gettext.py

@ -12,6 +12,7 @@ def main(args):
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json") p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
p.add_argument("--showurl", default=False, action="store_true") p.add_argument("--showurl", default=False, action="store_true")
p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") p.add_argument("--format", default="text", help="output format, can be: text, json; default: text")
p.add_argument("--rev", type=int, default=None, help="revision, default: latest")
args = p.parse_args(args) args = p.parse_args(args)
with open(args.padinfo) as f: with open(args.padinfo) as f:
@ -19,7 +20,9 @@ def main(args):
apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info) apiurl = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
data = {} data = {}
data['apikey'] = info['apikey'] data['apikey'] = info['apikey']
data['padID'] = args.padid.encode("utf-8") data['padID'] = args.padid # is utf-8 encoded
if args.rev != None:
data['rev'] = args.rev
requesturl = apiurl+'getText?'+urlencode(data) requesturl = apiurl+'getText?'+urlencode(data)
if args.showurl: if args.showurl:
print requesturl print requesturl
@ -29,6 +32,3 @@ def main(args):
print json.dumps(results) print json.dumps(results)
else: else:
print results['text'].encode("utf-8") print results['text'].encode("utf-8")
# To save to file run:
# python gettext.py > copy.txt

32
etherdump/commands/showmeta.py

@ -0,0 +1,32 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import json, sys, re
from common import *
"""
Extract and output selected fields of metadata
"""
def main (args):
p = ArgumentParser("")
p.add_argument("--path", default=None, help="read from a meta.json file")
p.add_argument("--padid", default=None, help="read meta for this padid")
p.add_argument("--format", default="{padid}", help="format str, default: {padid}")
args = p.parse_args(args)
path = args.path
if not path and args.padid:
path = padpath(args.padid) + ".meta.json"
if not path:
print ("Must specify either --path or --padid")
sys.exit(-1)
with open(path) as f:
meta = json.load(f)
formatstr = args.format.decode("utf-8")
formatstr = re.sub(ur"{(\w+)}", r"{0[\1]}", formatstr)
print (formatstr.format(meta).encode("utf-8"))

96
etherdump/commands/sync.py

@ -0,0 +1,96 @@
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import sys, json, re, os
from datetime import datetime
from urllib import urlencode
from urllib2 import urlopen, HTTPError, URLError
from math import ceil, floor
from common import *
"""
sync(meta):
Update meta data files for those that have changed.
Check for changed pads by looking at revisions & comparing to existing
"""
def jsonload (url):
f = urlopen(url)
data = f.read()
f.close()
return json.loads(data)
def load_padinfo(p):
with open(p) as f:
info = json.load(f)
info['api'] = "{0[protocol]}://{0[hostname]}:{0[port]}{0[apiurl]}{0[apiversion]}/".format(info)
return info
def main (args):
p = ArgumentParser("")
p.add_argument("--padinfo", default="padinfo.json", help="padinfo, default: padinfo.json")
p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False")
p.add_argument("--pub", default="pub", help="pub path for output, default: pub")
p.add_argument("--group", default="g", help="group path for output, default: g")
p.add_argument("--skip", default=None, type=int, help="skip this many items, default: None")
args = p.parse_args(args)
info = load_padinfo(args.padinfo)
data = {}
data['apikey'] = info['apikey']
padids = jsonload(info['api']+'listAllPads?'+urlencode(data))['data']['padIDs']
padids.sort()
numpads = len(padids)
maxmsglen = 0
count = 0
for i, padid in enumerate(padids):
if args.skip != None and i<args.skip:
continue
p = (float(i) / numpads)
percentage = int(floor(p*100))
bars = int(ceil(p*20))
bar = ("*"*bars) + ("-"*(20-bars))
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), numpads, padid)
if len(msg) > maxmsglen:
maxmsglen = len(msg)
sys.stderr.write("\r{0}".format(" "*maxmsglen))
sys.stderr.write(msg.encode("utf-8"))
sys.stderr.flush()
data['padID'] = padid.encode("utf-8")
p = padpath(padid, args.pub, args.group)
metapath = p + ".meta.json"
revisions = None
if os.path.exists(metapath):
with open(metapath) as f:
meta = json.load(f)
revisions = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
if meta['revisions'] == revisions:
continue
meta = {'padid': padid.encode("utf-8")}
if revisions == None:
meta['revisions'] = jsonload(info['api']+'getRevisionsCount?'+urlencode(data))['data']['revisions']
else:
meta['revisions' ] = revisions
if (meta['revisions'] == 0) and (not args.zerorevs):
# print("Skipping zero revs", file=sys.stderr)
continue
count += 1
# todo: load more metadata!
meta['lastedited_raw'] = int(jsonload(info['api']+'getLastEdited?'+urlencode(data))['data']['lastEdited'])
meta['lastedited_iso'] = datetime.fromtimestamp(int(meta['lastedited_raw'])/1000).isoformat()
meta['author_ids'] = jsonload(info['api'] +'listAuthorsOfPad?'+urlencode(data))['data']['authorIDs']
# save it
try:
os.makedirs(os.path.split(metapath)[0])
except OSError:
pass
with open(metapath, "w") as f:
json.dump(meta, f)
print("\nWrote {0} files...".format(count), file=sys.stderr)
Loading…
Cancel
Save