|
|
@ -1,7 +1,8 @@ |
|
|
|
from __future__ import print_function |
|
|
|
from argparse import ArgumentParser |
|
|
|
import sys, json, re, os |
|
|
|
import sys, json, re, os, time |
|
|
|
from datetime import datetime |
|
|
|
import dateutil.parser |
|
|
|
|
|
|
|
try: |
|
|
|
# python2 |
|
|
@ -27,6 +28,7 @@ index: |
|
|
|
""" |
|
|
|
|
|
|
|
def group (items, key=lambda x: x): |
|
|
|
""" returns a list of lists, of items grouped by a key function """ |
|
|
|
ret = [] |
|
|
|
keys = {} |
|
|
|
for item in items: |
|
|
@ -39,8 +41,19 @@ def group (items, key=lambda x: x): |
|
|
|
ret.append(keys[k]) |
|
|
|
return ret |
|
|
|
|
|
|
|
# def base (x): |
|
|
|
# return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) |
|
|
|
|
|
|
|
def splitextlong (x): |
|
|
|
""" split "long" extensions, i.e. foo.bar.baz => ('foo', '.bar.baz') """ |
|
|
|
m = re.search(r"^(.*?)(\..*)$", x) |
|
|
|
if m: |
|
|
|
return m.groups() |
|
|
|
else: |
|
|
|
return x, '' |
|
|
|
|
|
|
|
def base (x): |
|
|
|
return re.sub(r"(\.raw\.html)|(\.diff\.html)|(\.meta\.json)|(\.raw\.txt)$", "", x) |
|
|
|
return splitextlong(x)[0] |
|
|
|
|
|
|
|
def excerpt (t, chars=25): |
|
|
|
if len(t) > chars: |
|
|
@ -60,10 +73,18 @@ def url_base (url): |
|
|
|
ret += "/" |
|
|
|
return ret |
|
|
|
|
|
|
|
def datetimeformat (t, format='%Y-%m-%d %H:%M:%S'): |
|
|
|
if type(t) == str: |
|
|
|
dt = dateutil.parser.parse(t) |
|
|
|
return dt.strftime(format) |
|
|
|
else: |
|
|
|
return time.strftime(format, time.localtime(t)) |
|
|
|
|
|
|
|
def main (args): |
|
|
|
p = ArgumentParser("Convert dumped files to a document via a template.") |
|
|
|
|
|
|
|
p.add_argument("input", nargs="+", help="filenames (uses .meta.json files)") |
|
|
|
p.add_argument("input", nargs="+", help="Files to list (.meta.json files)") |
|
|
|
|
|
|
|
p.add_argument("--templatepath", default=None, help="path to find templates, default: built-in") |
|
|
|
p.add_argument("--template", default="index.html", help="template name, built-ins include index.html, rss.xml; default: index.html") |
|
|
|
p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: ./.etherdump/settings.json") |
|
|
@ -103,19 +124,55 @@ def main (args): |
|
|
|
|
|
|
|
env = Environment(loader=FileSystemLoader(tmpath)) |
|
|
|
env.filters["excerpt"] = excerpt |
|
|
|
env.filters["datetimeformat"] = datetimeformat |
|
|
|
template = env.get_template(args.template) |
|
|
|
|
|
|
|
info = loadpadinfo(args.padinfo) |
|
|
|
|
|
|
|
inputs = args.input |
|
|
|
inputs.sort() |
|
|
|
inputs = group(inputs, base) |
|
|
|
# Use "base" to strip (longest) extensions |
|
|
|
# inputs = group(inputs, base) |
|
|
|
|
|
|
|
def wrappath (p): |
|
|
|
path = "./{0}".format(p) |
|
|
|
ext = os.path.splitext(p)[1][1:] |
|
|
|
return { |
|
|
|
"url": path, |
|
|
|
"path": path, |
|
|
|
"code": 200, |
|
|
|
"type": ext |
|
|
|
} |
|
|
|
|
|
|
|
def loadmeta(paths): |
|
|
|
def metaforpaths (paths): |
|
|
|
ret = {} |
|
|
|
pid = base(paths[0]) |
|
|
|
ret['pad'] = ret['padid'] = pid |
|
|
|
ret['versions'] = [wrappath(x) for x in paths] |
|
|
|
lastedited = None |
|
|
|
for p in paths: |
|
|
|
if p.endswith(".meta.json"): |
|
|
|
with open(p) as f: |
|
|
|
return json.load(f) |
|
|
|
mtime = os.stat(p).st_mtime |
|
|
|
if lastedited == None or mtime > lastedited: |
|
|
|
lastedited = mtime |
|
|
|
ret["lastedited_iso"] = datetime.fromtimestamp(lastedited).strftime("%Y-%m-%dT%H:%M:%S") |
|
|
|
ret["lastedited_raw"] = mtime |
|
|
|
return ret |
|
|
|
|
|
|
|
def loadmeta(p): |
|
|
|
# Consider a set of grouped files |
|
|
|
# Otherwise, create a "dummy" one that wraps all the files as versions |
|
|
|
if p.endswith(".meta.json"): |
|
|
|
with open(p) as f: |
|
|
|
return json.load(f) |
|
|
|
# # IF there is a .meta.json, load it & MERGE with other files |
|
|
|
# if ret: |
|
|
|
# # TODO: merge with other files |
|
|
|
# for p in paths: |
|
|
|
# if "./"+p not in ret['versions']: |
|
|
|
# ret['versions'].append(wrappath(p)) |
|
|
|
# return ret |
|
|
|
# else: |
|
|
|
# return metaforpaths(paths) |
|
|
|
|
|
|
|
def fixdates (padmeta): |
|
|
|
d = dateutil.parser.parse(padmeta["lastedited_iso"]) |
|
|
@ -124,9 +181,59 @@ def main (args): |
|
|
|
return padmeta |
|
|
|
|
|
|
|
pads = map(loadmeta, inputs) |
|
|
|
pads = [x for x in pads if x != None] |
|
|
|
pads = map(fixdates, pads) |
|
|
|
args.pads = list(pads) |
|
|
|
|
|
|
|
inputs = args.input |
|
|
|
inputs.sort() |
|
|
|
removelist = [] |
|
|
|
|
|
|
|
def has_version (padinfo, path): |
|
|
|
return [x for x in padinfo['versions'] if 'path' in x and x['path'] == "./"+path] |
|
|
|
|
|
|
|
pads_by_base = {} |
|
|
|
for p in args.pads: |
|
|
|
# print ("Trying padid", p['padid'], file=sys.stderr) |
|
|
|
padbase = os.path.splitext(p['padid'])[0] |
|
|
|
pads_by_base[padbase] = p |
|
|
|
padbases = list(pads_by_base.keys()) |
|
|
|
# SORT THEM LONGEST FIRST TO ensure that LONGEST MATCHES MATCH |
|
|
|
padbases.sort(key=lambda x: len(x), reverse=True) |
|
|
|
# print ("PADBASES", file=sys.stderr) |
|
|
|
# for pb in padbases: |
|
|
|
# print (" ", pb, file=sys.stderr) |
|
|
|
|
|
|
|
def could_have_base (x, y): |
|
|
|
return x == y or (x.startswith(y) and x[len(y):].startswith(".")) |
|
|
|
|
|
|
|
def get_best_pad (x): |
|
|
|
for pb in padbases: |
|
|
|
p = pads_by_base[pb] |
|
|
|
if could_have_base(x, pb): |
|
|
|
return p |
|
|
|
|
|
|
|
for x in inputs: |
|
|
|
# pair input with a pad if possible |
|
|
|
p = get_best_pad(x) |
|
|
|
if p: |
|
|
|
if not has_version(p, x): |
|
|
|
print ("Grouping file {0} with pad {1}".format(x, p['padid']), file=sys.stderr) |
|
|
|
p['versions'].append(wrappath(x)) |
|
|
|
# else: |
|
|
|
# print ("Skipping existing version {0} ({1})...".format(x, p['padid']), file=sys.stderr) |
|
|
|
removelist.append(x) |
|
|
|
# Removed Matches files |
|
|
|
for x in removelist: |
|
|
|
inputs.remove(x) |
|
|
|
# print ("Remaining files:", file=sys.stderr) |
|
|
|
# for x in inputs: |
|
|
|
# print (x, file=sys.stderr) |
|
|
|
# print (file=sys.stderr) |
|
|
|
# Add "fake" pads for remaining files |
|
|
|
for x in inputs: |
|
|
|
args.pads.append(metaforpaths([x])) |
|
|
|
|
|
|
|
if args.timestamp == None: |
|
|
|
args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
|
|
|
|
|
@ -165,18 +272,21 @@ def main (args): |
|
|
|
for v in p["versions"]: |
|
|
|
t = v["type"] |
|
|
|
versions_by_type[t] = v |
|
|
|
with open (versions_by_type["text"]["path"]) as f: |
|
|
|
p["text"] = f.read() |
|
|
|
|
|
|
|
# ADD IN LINK |
|
|
|
if "text" in versions_by_type: |
|
|
|
with open (versions_by_type["text"]["path"]) as f: |
|
|
|
p["text"] = f.read() |
|
|
|
|
|
|
|
# ADD IN LINK TO PAD AS "link" |
|
|
|
for v in linkversions: |
|
|
|
vdata = versions_by_type[v] |
|
|
|
try: |
|
|
|
if v == "pad" or os.path.exists(vdata["path"]): |
|
|
|
p["link"] = absurl(vdata["url"], linkbase) |
|
|
|
break |
|
|
|
except KeyError as e: |
|
|
|
pass |
|
|
|
if v in versions_by_type: |
|
|
|
vdata = versions_by_type[v] |
|
|
|
try: |
|
|
|
if v == "pad" or os.path.exists(vdata["path"]): |
|
|
|
p["link"] = absurl(vdata["url"], linkbase) |
|
|
|
break |
|
|
|
except KeyError as e: |
|
|
|
pass |
|
|
|
|
|
|
|
if args.output: |
|
|
|
with open(args.output, "w") as f: |
|
|
|