Browse Source

index

add-quote-import
Michael Murtaugh 10 years ago
parent
commit
52828c9d7c
  1. 79
      etherdump

79
etherdump

@ -15,6 +15,9 @@ PADINFO_DEFAULTS = {
"apiurl": "/api/" "apiurl": "/api/"
} }
MODULE_PATH = (os.path.dirname(__file__))
TEMPLATES_PATH = os.path.join(MODULE_PATH, "templates")
verbose = False verbose = False
def pad_split_group (n): def pad_split_group (n):
@ -73,14 +76,23 @@ class PadServer (object):
else: else:
return self.protocol+"://"+self.hostname+"/public_pad/"+padID return self.protocol+"://"+self.hostname+"/public_pad/"+padID
def get_template_env (tpath=None):
def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_existing=False, template=None):
if template != None:
import jinja2 import jinja2
with open(template) as f: paths = []
template = jinja2.Template(f.read().decode("utf-8")) if tpath and os.path.isdir(tpath):
paths.append(tpath)
for padid in padids: paths.append(TEMPLATES_PATH)
loader = jinja2.FileSystemLoader(paths)
env = jinja2.Environment(loader=loader)
return env
# template = env.get_template('pad.html')
# print template.render(the='variables', go='here').encode("utf-8")
def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_existing=False, templates=None):
template_env = get_template_env(templates)
pad_template = template_env.get_template("pad.html")
numpads = len(padids)
for i, padid in enumerate(padids):
group_id, pad_name = pad_split_group(padid) group_id, pad_name = pad_split_group(padid)
if group_id: if group_id:
try: try:
@ -101,6 +113,9 @@ def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_exis
if verbose: if verbose:
print (u"Saving to {0}".format(fp).encode("utf-8"), file=sys.stderr) print (u"Saving to {0}".format(fp).encode("utf-8"), file=sys.stderr)
else:
sys.stderr.write("\rDumping pads... [{0}/{1}]".format(i+1, numpads))
sys.stderr.flush()
if skip_existing: if skip_existing:
if os.path.exists(fp+".json"): if os.path.exists(fp+".json"):
@ -112,6 +127,7 @@ def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_exis
'group_id': group_id, 'group_id': group_id,
'pad_name': pad_name 'pad_name': pad_name
} }
meta['last_edited'] = padserver.getPadLastEdited(padid).isoformat() meta['last_edited'] = padserver.getPadLastEdited(padid).isoformat()
# Write Text # Write Text
@ -132,18 +148,19 @@ def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_exis
html = padserver.getPadHTML(padid) html = padserver.getPadHTML(padid)
meta['html_path'] = htmlpath meta['html_path'] = htmlpath
meta['html_length'] = len(html) meta['html_length'] = len(html)
if template: if pad_template:
t = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False) t = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
body = t.find(".//body") body = t.find(".//body")
title = padid title = padid
editurl = padserver.getPadURL(padid) editurl = padserver.getPadURL(padid)
meta['url'] = editurl meta['url'] = editurl
f.write(template.render( json_dump = json.dumps(meta)
f.write(pad_template.render(
body=content(body), body=content(body),
title=title, title=title,
editurl=editurl, editurl=editurl,
sourceurl=textpath, sourceurl=textpath,
metadata_json=json.dumps(meta))) # unicode error HERE! metadata_json=json_dump).encode("utf-8")) # unicode error HERE!
else: else:
f.write(html.encode("utf-8")) f.write(html.encode("utf-8"))
@ -156,6 +173,11 @@ def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_exis
if sleeptime: if sleeptime:
time.sleep(sleeptime) time.sleep(sleeptime)
if not verbose:
sys.stderr.write("\rDumping pads... [{0}] \n".format(numpads))
sys.stderr.flush()
def humanize_bytes(bytes, precision=0): def humanize_bytes(bytes, precision=0):
"""Return a humanized string representation of a number of bytes. """Return a humanized string representation of a number of bytes.
@ -225,16 +247,13 @@ if __name__ == "__main__":
parser.add_argument('--skip-existing', default=False, action="store_true", help='skip existing files on dump') parser.add_argument('--skip-existing', default=False, action="store_true", help='skip existing files on dump')
parser.add_argument('--limit', default=None, type=int, help='(dump) stop after limit items') parser.add_argument('--limit', default=None, type=int, help='(dump) stop after limit items')
# DUMP parser.add_argument('--templates', default=os.path.join(os.getcwd(), "templates"), help='(addition) templates path, default: ./templates')
parser.add_argument('--template', default="templates/pad.html", help='path for (dump) template, default: templates/pad.html')
# OPTIONS SPECIFIC TO CREATEINDEX # INDEX-specific opts
parser.add_argument('--exclude-groups', default=False, action="store_true", help='(createindex) ignore groups') parser.add_argument('--title', default="etherpad index & archive", help='(index) title')
parser.add_argument('--exclude-groups', default=False, action="store_true", help='(index) ignore groups')
parser.add_argument('--groupinfo', default=None, help='(createindex) groupinfo json file') parser.add_argument('--groupinfo', default=None, help='(createindex) groupinfo json file')
parser.add_argument('--indextemplate', default="templates/index.html", help='(createindex) path for template, default: templates/index.html') parser.add_argument('--output', default=None, help='(index) path for output (default stdout)')
parser.add_argument('--indextitle', default="etherpad archive & index", help='(createindex) title')
parser.add_argument('--indexcss', default="styles.css", help='(createindex) index: css url')
parser.add_argument('--output', default=None, help='(createindex) path for output (default stdout)')
args = parser.parse_args() args = parser.parse_args()
@ -301,21 +320,23 @@ if __name__ == "__main__":
padids = padserver.listAllPads() padids = padserver.listAllPads()
if args.limit: if args.limit:
padids = padids[:args.limit] padids = padids[:args.limit]
dumpPads( dumpPads(
padserver, padserver,
padids, padids,
args.pubpath, args.pubpath,
args.grouppath, args.grouppath,
args.skip_existing, args.skip_existing,
template=args.template) templates=args.templates)
if verbose: if verbose:
print ("Completed in {0:0.0f} seconds".format(time.time()-start), file=sys.stderr) print ("Completed in {0:0.0f} seconds".format(time.time()-start), file=sys.stderr)
elif cmd == "createindex": elif cmd == "index":
def get_pads(groupinfo=None): def get_pads(groupinfo=None):
pads = padids_from_path(args.pubpath) pads = padids_from_path(args.pubpath)
print (("padids_from_path", args.pubpath, pads), file=sys.stderr) # print (("padids_from_path", args.pubpath, pads), file=sys.stderr)
if not args.exclude_groups and os.path.exists(args.grouppath): if not args.exclude_groups and os.path.exists(args.grouppath):
groups = [os.path.join(args.grouppath, x) for x in os.listdir(args.grouppath)] groups = [os.path.join(args.grouppath, x) for x in os.listdir(args.grouppath)]
groups = [x for x in groups if os.path.isdir(x)] groups = [x for x in groups if os.path.isdir(x)]
@ -346,16 +367,16 @@ if __name__ == "__main__":
out = open(args.output, "w") out = open(args.output, "w")
import jinja2 import jinja2
with open(args.indextemplate) as f: env = get_template_env(args.templates)
template = jinja2.Template(f.read().decode("utf-8")) index_template = env.get_template("index.html")
out.write(template.render(
title=args.indextitle, out.write(index_template.render(
css=args.indexcss, pads = pads,
pads = pads title = args.title
)) ).encode("utf-8"))
if args.output: if args.output:
output.close() out.close()
else: else:

Loading…
Cancel
Save