diff --git a/etherdump b/etherdump index 1b3db4f..c1b0246 100755 --- a/etherdump +++ b/etherdump @@ -15,6 +15,9 @@ PADINFO_DEFAULTS = { "apiurl": "/api/" } +MODULE_PATH = (os.path.dirname(__file__)) +TEMPLATES_PATH = os.path.join(MODULE_PATH, "templates") + verbose = False def pad_split_group (n): @@ -73,14 +76,23 @@ class PadServer (object): else: return self.protocol+"://"+self.hostname+"/public_pad/"+padID - -def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_existing=False, template=None): - if template != None: - import jinja2 - with open(template) as f: - template = jinja2.Template(f.read().decode("utf-8")) - - for padid in padids: +def get_template_env (tpath=None): + import jinja2 + paths = [] + if tpath and os.path.isdir(tpath): + paths.append(tpath) + paths.append(TEMPLATES_PATH) + loader = jinja2.FileSystemLoader(paths) + env = jinja2.Environment(loader=loader) + return env + # template = env.get_template('pad.html') + # print template.render(the='variables', go='here').encode("utf-8") + +def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_existing=False, templates=None): + template_env = get_template_env(templates) + pad_template = template_env.get_template("pad.html") + numpads = len(padids) + for i, padid in enumerate(padids): group_id, pad_name = pad_split_group(padid) if group_id: try: @@ -101,6 +113,9 @@ def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_exis if verbose: print (u"Saving to {0}".format(fp).encode("utf-8"), file=sys.stderr) + else: + sys.stderr.write("\rDumping pads... [{0}/{1}]".format(i+1, numpads)) + sys.stderr.flush() if skip_existing: if os.path.exists(fp+".json"): @@ -112,6 +127,7 @@ def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_exis 'group_id': group_id, 'pad_name': pad_name } + meta['last_edited'] = padserver.getPadLastEdited(padid).isoformat() # Write Text @@ -132,18 +148,19 @@ def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_exis html = padserver.getPadHTML(padid) meta['html_path'] = htmlpath meta['html_length'] = len(html) - if template: + if pad_template: t = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False) body = t.find(".//body") title = padid editurl = padserver.getPadURL(padid) meta['url'] = editurl - f.write(template.render( + json_dump = json.dumps(meta) + f.write(pad_template.render( body=content(body), title=title, editurl=editurl, sourceurl=textpath, - metadata_json=json.dumps(meta))) # unicode error HERE! + metadata_json=json_dump).encode("utf-8")) # unicode error HERE! else: f.write(html.encode("utf-8")) @@ -156,6 +173,11 @@ def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_exis if sleeptime: time.sleep(sleeptime) + if not verbose: + sys.stderr.write("\rDumping pads... [{0}] \n".format(numpads)) + sys.stderr.flush() + + def humanize_bytes(bytes, precision=0): """Return a humanized string representation of a number of bytes. @@ -225,16 +247,13 @@ if __name__ == "__main__": parser.add_argument('--skip-existing', default=False, action="store_true", help='skip existing files on dump') parser.add_argument('--limit', default=None, type=int, help='(dump) stop after limit items') - # DUMP - parser.add_argument('--template', default="templates/pad.html", help='path for (dump) template, default: templates/pad.html') + parser.add_argument('--templates', default=os.path.join(os.getcwd(), "templates"), help='(addition) templates path, default: ./templates') - # OPTIONS SPECIFIC TO CREATEINDEX - parser.add_argument('--exclude-groups', default=False, action="store_true", help='(createindex) ignore groups') + # INDEX-specific opts + parser.add_argument('--title', default="etherpad index & archive", help='(index) title') + parser.add_argument('--exclude-groups', default=False, action="store_true", help='(index) ignore groups') parser.add_argument('--groupinfo', default=None, help='(createindex) groupinfo json file') - parser.add_argument('--indextemplate', default="templates/index.html", help='(createindex) path for template, default: templates/index.html') - parser.add_argument('--indextitle', default="etherpad archive & index", help='(createindex) title') - parser.add_argument('--indexcss', default="styles.css", help='(createindex) index: css url') - parser.add_argument('--output', default=None, help='(createindex) path for output (default stdout)') + parser.add_argument('--output', default=None, help='(index) path for output (default stdout)') args = parser.parse_args() @@ -301,21 +320,23 @@ if __name__ == "__main__": padids = padserver.listAllPads() if args.limit: padids = padids[:args.limit] + dumpPads( padserver, padids, args.pubpath, args.grouppath, args.skip_existing, - template=args.template) + templates=args.templates) + if verbose: print ("Completed in {0:0.0f} seconds".format(time.time()-start), file=sys.stderr) - elif cmd == "createindex": + elif cmd == "index": def get_pads(groupinfo=None): pads = padids_from_path(args.pubpath) - print (("padids_from_path", args.pubpath, pads), file=sys.stderr) + # print (("padids_from_path", args.pubpath, pads), file=sys.stderr) if not args.exclude_groups and os.path.exists(args.grouppath): groups = [os.path.join(args.grouppath, x) for x in os.listdir(args.grouppath)] groups = [x for x in groups if os.path.isdir(x)] @@ -346,16 +367,16 @@ if __name__ == "__main__": out = open(args.output, "w") import jinja2 - with open(args.indextemplate) as f: - template = jinja2.Template(f.read().decode("utf-8")) - out.write(template.render( - title=args.indextitle, - css=args.indexcss, - pads = pads - )) + env = get_template_env(args.templates) + index_template = env.get_template("index.html") + + out.write(index_template.render( + pads = pads, + title = args.title + ).encode("utf-8")) if args.output: - output.close() + out.close() else: