Browse Source

index

add-quote-import
Michael Murtaugh 10 years ago
parent
commit
52828c9d7c
  1. 81
      etherdump

81
etherdump

@ -15,6 +15,9 @@ PADINFO_DEFAULTS = {
"apiurl": "/api/"
}
MODULE_PATH = (os.path.dirname(__file__))
TEMPLATES_PATH = os.path.join(MODULE_PATH, "templates")
verbose = False
def pad_split_group (n):
@ -73,14 +76,23 @@ class PadServer (object):
else:
return self.protocol+"://"+self.hostname+"/public_pad/"+padID
def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_existing=False, template=None):
if template != None:
import jinja2
with open(template) as f:
template = jinja2.Template(f.read().decode("utf-8"))
for padid in padids:
def get_template_env (tpath=None):
import jinja2
paths = []
if tpath and os.path.isdir(tpath):
paths.append(tpath)
paths.append(TEMPLATES_PATH)
loader = jinja2.FileSystemLoader(paths)
env = jinja2.Environment(loader=loader)
return env
# template = env.get_template('pad.html')
# print template.render(the='variables', go='here').encode("utf-8")
def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_existing=False, templates=None):
template_env = get_template_env(templates)
pad_template = template_env.get_template("pad.html")
numpads = len(padids)
for i, padid in enumerate(padids):
group_id, pad_name = pad_split_group(padid)
if group_id:
try:
@ -101,6 +113,9 @@ def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_exis
if verbose:
print (u"Saving to {0}".format(fp).encode("utf-8"), file=sys.stderr)
else:
sys.stderr.write("\rDumping pads... [{0}/{1}]".format(i+1, numpads))
sys.stderr.flush()
if skip_existing:
if os.path.exists(fp+".json"):
@ -112,6 +127,7 @@ def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_exis
'group_id': group_id,
'pad_name': pad_name
}
meta['last_edited'] = padserver.getPadLastEdited(padid).isoformat()
# Write Text
@ -132,18 +148,19 @@ def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_exis
html = padserver.getPadHTML(padid)
meta['html_path'] = htmlpath
meta['html_length'] = len(html)
if template:
if pad_template:
t = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
body = t.find(".//body")
title = padid
editurl = padserver.getPadURL(padid)
meta['url'] = editurl
f.write(template.render(
json_dump = json.dumps(meta)
f.write(pad_template.render(
body=content(body),
title=title,
editurl=editurl,
sourceurl=textpath,
metadata_json=json.dumps(meta))) # unicode error HERE!
metadata_json=json_dump).encode("utf-8")) # unicode error HERE!
else:
f.write(html.encode("utf-8"))
@ -156,6 +173,11 @@ def dumpPads (padserver, padids, pub_path, group_path, sleeptime=0.01, skip_exis
if sleeptime:
time.sleep(sleeptime)
if not verbose:
sys.stderr.write("\rDumping pads... [{0}] \n".format(numpads))
sys.stderr.flush()
def humanize_bytes(bytes, precision=0):
"""Return a humanized string representation of a number of bytes.
@ -225,16 +247,13 @@ if __name__ == "__main__":
parser.add_argument('--skip-existing', default=False, action="store_true", help='skip existing files on dump')
parser.add_argument('--limit', default=None, type=int, help='(dump) stop after limit items')
# DUMP
parser.add_argument('--template', default="templates/pad.html", help='path for (dump) template, default: templates/pad.html')
parser.add_argument('--templates', default=os.path.join(os.getcwd(), "templates"), help='(addition) templates path, default: ./templates')
# OPTIONS SPECIFIC TO CREATEINDEX
parser.add_argument('--exclude-groups', default=False, action="store_true", help='(createindex) ignore groups')
# INDEX-specific opts
parser.add_argument('--title', default="etherpad index & archive", help='(index) title')
parser.add_argument('--exclude-groups', default=False, action="store_true", help='(index) ignore groups')
parser.add_argument('--groupinfo', default=None, help='(createindex) groupinfo json file')
parser.add_argument('--indextemplate', default="templates/index.html", help='(createindex) path for template, default: templates/index.html')
parser.add_argument('--indextitle', default="etherpad archive & index", help='(createindex) title')
parser.add_argument('--indexcss', default="styles.css", help='(createindex) index: css url')
parser.add_argument('--output', default=None, help='(createindex) path for output (default stdout)')
parser.add_argument('--output', default=None, help='(index) path for output (default stdout)')
args = parser.parse_args()
@ -301,21 +320,23 @@ if __name__ == "__main__":
padids = padserver.listAllPads()
if args.limit:
padids = padids[:args.limit]
dumpPads(
padserver,
padids,
args.pubpath,
args.grouppath,
args.skip_existing,
template=args.template)
templates=args.templates)
if verbose:
print ("Completed in {0:0.0f} seconds".format(time.time()-start), file=sys.stderr)
elif cmd == "createindex":
elif cmd == "index":
def get_pads(groupinfo=None):
pads = padids_from_path(args.pubpath)
print (("padids_from_path", args.pubpath, pads), file=sys.stderr)
# print (("padids_from_path", args.pubpath, pads), file=sys.stderr)
if not args.exclude_groups and os.path.exists(args.grouppath):
groups = [os.path.join(args.grouppath, x) for x in os.listdir(args.grouppath)]
groups = [x for x in groups if os.path.isdir(x)]
@ -346,16 +367,16 @@ if __name__ == "__main__":
out = open(args.output, "w")
import jinja2
with open(args.indextemplate) as f:
template = jinja2.Template(f.read().decode("utf-8"))
out.write(template.render(
title=args.indextitle,
css=args.indexcss,
pads = pads
))
env = get_template_env(args.templates)
index_template = env.get_template("index.html")
out.write(index_template.render(
pads = pads,
title = args.title
).encode("utf-8"))
if args.output:
output.close()
out.close()
else:

Loading…
Cancel
Save