|
|
@ -163,19 +163,7 @@ def build_argument_parser(args): |
|
|
|
return parser |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(args): |
|
|
|
p = build_argument_parser(args) |
|
|
|
args = p.parse_args(args) |
|
|
|
|
|
|
|
raw_ext = ".raw.txt" |
|
|
|
if args.no_raw_ext: |
|
|
|
raw_ext = "" |
|
|
|
|
|
|
|
info = loadpadinfo(args.padinfo) |
|
|
|
data = {} |
|
|
|
data['apikey'] = info['apikey'] |
|
|
|
|
|
|
|
def get_padids(args, info, data): |
|
|
|
if args.padid: |
|
|
|
padids = args.padid |
|
|
|
elif args.glob: |
|
|
@ -188,258 +176,208 @@ def main(args): |
|
|
|
info['localapiurl'] + 'listAllPads?' + urlencode(data) |
|
|
|
)['data']['padIDs'] |
|
|
|
padids.sort() |
|
|
|
numpads = len(padids) |
|
|
|
# maxmsglen = 0 |
|
|
|
count = 0 |
|
|
|
|
|
|
|
progress_kwargs = {} |
|
|
|
if not istty(): |
|
|
|
progress_kwargs.update(dict(disable=True)) |
|
|
|
progress_pads = tqdm(iterable=padids, total=len(padids), **progress_kwargs) |
|
|
|
return padids |
|
|
|
|
|
|
|
for i, padid in enumerate(progress_pads): |
|
|
|
if args.skip != None and i < args.skip: |
|
|
|
continue |
|
|
|
|
|
|
|
data['padID'] = padid |
|
|
|
p = padpath(padid, args.pub, args.group, args.fix_names) |
|
|
|
if args.folder: |
|
|
|
p = os.path.join(p, padid) |
|
|
|
def handle_pad(args, index, padid, data, info, raw_ext): |
|
|
|
if args.skip != None and index < args.skip: |
|
|
|
return |
|
|
|
|
|
|
|
metapath = p + ".meta.json" |
|
|
|
revisions = None |
|
|
|
tries = 1 |
|
|
|
skip = False |
|
|
|
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"]) |
|
|
|
meta = {} |
|
|
|
data['padID'] = padid |
|
|
|
p = padpath(padid, args.pub, args.group, args.fix_names) |
|
|
|
if args.folder: |
|
|
|
p = os.path.join(p, padid) |
|
|
|
|
|
|
|
while True: |
|
|
|
try: |
|
|
|
if os.path.exists(metapath): |
|
|
|
with open(metapath) as f: |
|
|
|
meta.update(json.load(f)) |
|
|
|
revisions = getjson( |
|
|
|
info['localapiurl'] |
|
|
|
+ 'getRevisionsCount?' |
|
|
|
+ urlencode(data) |
|
|
|
)['data']['revisions'] |
|
|
|
if meta['revisions'] == revisions and not args.force: |
|
|
|
skip = True |
|
|
|
break |
|
|
|
|
|
|
|
meta['padid'] = padid |
|
|
|
versions = meta["versions"] = [] |
|
|
|
versions.append( |
|
|
|
{ |
|
|
|
"url": padurlbase + quote(padid), |
|
|
|
"type": "pad", |
|
|
|
"code": 200, |
|
|
|
} |
|
|
|
) |
|
|
|
|
|
|
|
if revisions == None: |
|
|
|
meta['revisions'] = getjson( |
|
|
|
info['localapiurl'] |
|
|
|
+ 'getRevisionsCount?' |
|
|
|
+ urlencode(data) |
|
|
|
)['data']['revisions'] |
|
|
|
else: |
|
|
|
meta['revisions'] = revisions |
|
|
|
metapath = p + ".meta.json" |
|
|
|
revisions = None |
|
|
|
tries = 1 |
|
|
|
skip = False |
|
|
|
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"]) |
|
|
|
meta = {} |
|
|
|
|
|
|
|
if (meta['revisions'] == 0) and (not args.zerorevs): |
|
|
|
while True: |
|
|
|
try: |
|
|
|
if os.path.exists(metapath): |
|
|
|
with open(metapath) as f: |
|
|
|
meta.update(json.load(f)) |
|
|
|
revisions = getjson( |
|
|
|
info['localapiurl'] + 'getRevisionsCount?' + urlencode(data) |
|
|
|
)['data']['revisions'] |
|
|
|
if meta['revisions'] == revisions and not args.force: |
|
|
|
skip = True |
|
|
|
break |
|
|
|
|
|
|
|
# todo: load more metadata! |
|
|
|
meta['group'], meta['pad'] = splitpadname(padid) |
|
|
|
meta['pathbase'] = p |
|
|
|
meta['lastedited_raw'] = int( |
|
|
|
getjson( |
|
|
|
info['localapiurl'] + 'getLastEdited?' + urlencode(data) |
|
|
|
)['data']['lastEdited'] |
|
|
|
) |
|
|
|
meta['lastedited_iso'] = datetime.fromtimestamp( |
|
|
|
int(meta['lastedited_raw']) / 1000 |
|
|
|
).isoformat() |
|
|
|
meta['author_ids'] = getjson( |
|
|
|
info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data) |
|
|
|
)['data']['authorIDs'] |
|
|
|
meta['padid'] = padid |
|
|
|
versions = meta["versions"] = [] |
|
|
|
versions.append( |
|
|
|
{"url": padurlbase + quote(padid), "type": "pad", "code": 200,} |
|
|
|
) |
|
|
|
|
|
|
|
if revisions is None: |
|
|
|
meta['revisions'] = getjson( |
|
|
|
info['localapiurl'] + 'getRevisionsCount?' + urlencode(data) |
|
|
|
)['data']['revisions'] |
|
|
|
else: |
|
|
|
meta['revisions'] = revisions |
|
|
|
|
|
|
|
if (meta['revisions'] == 0) and (not args.zerorevs): |
|
|
|
skip = True |
|
|
|
break |
|
|
|
except HTTPError as e: |
|
|
|
tries += 1 |
|
|
|
if tries > 3: |
|
|
|
print( |
|
|
|
"Too many failures ({0}), skipping".format(padid), |
|
|
|
file=sys.stderr, |
|
|
|
) |
|
|
|
skip = True |
|
|
|
break |
|
|
|
else: |
|
|
|
sleep(3) |
|
|
|
except TypeError as e: |
|
|
|
|
|
|
|
# todo: load more metadata! |
|
|
|
meta['group'], meta['pad'] = splitpadname(padid) |
|
|
|
meta['pathbase'] = p |
|
|
|
meta['lastedited_raw'] = int( |
|
|
|
getjson( |
|
|
|
info['localapiurl'] + 'getLastEdited?' + urlencode(data) |
|
|
|
)['data']['lastEdited'] |
|
|
|
) |
|
|
|
meta['lastedited_iso'] = datetime.fromtimestamp( |
|
|
|
int(meta['lastedited_raw']) / 1000 |
|
|
|
).isoformat() |
|
|
|
meta['author_ids'] = getjson( |
|
|
|
info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data) |
|
|
|
)['data']['authorIDs'] |
|
|
|
break |
|
|
|
except HTTPError as e: |
|
|
|
tries += 1 |
|
|
|
if tries > 3: |
|
|
|
print( |
|
|
|
"Type Error loading pad {0} (phantom pad?), skipping".format( |
|
|
|
padid |
|
|
|
), |
|
|
|
"Too many failures ({0}), skipping".format(padid), |
|
|
|
file=sys.stderr, |
|
|
|
) |
|
|
|
skip = True |
|
|
|
break |
|
|
|
else: |
|
|
|
sleep(3) |
|
|
|
except TypeError as e: |
|
|
|
print( |
|
|
|
"Type Error loading pad {0} (phantom pad?), skipping".format( |
|
|
|
padid |
|
|
|
), |
|
|
|
file=sys.stderr, |
|
|
|
) |
|
|
|
skip = True |
|
|
|
break |
|
|
|
|
|
|
|
if skip: |
|
|
|
continue |
|
|
|
if skip: |
|
|
|
return |
|
|
|
|
|
|
|
count += 1 |
|
|
|
if args.output: |
|
|
|
print(padid) |
|
|
|
|
|
|
|
if args.output: |
|
|
|
print(padid) |
|
|
|
if args.all or (args.meta or args.text or args.html or args.dhtml): |
|
|
|
try: |
|
|
|
os.makedirs(os.path.split(metapath)[0]) |
|
|
|
except OSError: |
|
|
|
pass |
|
|
|
|
|
|
|
if args.all or (args.meta or args.text or args.html or args.dhtml): |
|
|
|
try: |
|
|
|
os.makedirs(os.path.split(metapath)[0]) |
|
|
|
except OSError: |
|
|
|
pass |
|
|
|
|
|
|
|
if args.all or args.text: |
|
|
|
text = getjson(info['localapiurl'] + 'getText?' + urlencode(data)) |
|
|
|
ver = {"type": "text"} |
|
|
|
versions.append(ver) |
|
|
|
ver["code"] = text["_code"] |
|
|
|
if text["_code"] == 200: |
|
|
|
text = text['data']['text'] |
|
|
|
|
|
|
|
########################################## |
|
|
|
## ENFORCE __NOPUBLISH__ MAGIC WORD |
|
|
|
########################################## |
|
|
|
if args.nopublish and args.nopublish in text: |
|
|
|
# NEED TO PURGE ANY EXISTING DOCS |
|
|
|
try_deleting( |
|
|
|
( |
|
|
|
p + raw_ext, |
|
|
|
p + ".raw.html", |
|
|
|
p + ".diff.html", |
|
|
|
p + ".meta.json", |
|
|
|
) |
|
|
|
if args.all or args.text: |
|
|
|
text = getjson(info['localapiurl'] + 'getText?' + urlencode(data)) |
|
|
|
ver = {"type": "text"} |
|
|
|
versions.append(ver) |
|
|
|
ver["code"] = text["_code"] |
|
|
|
if text["_code"] == 200: |
|
|
|
text = text['data']['text'] |
|
|
|
|
|
|
|
########################################## |
|
|
|
## ENFORCE __NOPUBLISH__ MAGIC WORD |
|
|
|
########################################## |
|
|
|
if args.nopublish and args.nopublish in text: |
|
|
|
# NEED TO PURGE ANY EXISTING DOCS |
|
|
|
try_deleting( |
|
|
|
( |
|
|
|
p + raw_ext, |
|
|
|
p + ".raw.html", |
|
|
|
p + ".diff.html", |
|
|
|
p + ".meta.json", |
|
|
|
) |
|
|
|
continue |
|
|
|
|
|
|
|
########################################## |
|
|
|
## ENFORCE __PUBLISH__ MAGIC WORD |
|
|
|
########################################## |
|
|
|
if args.publish_opt_in and args.publish not in text: |
|
|
|
try_deleting( |
|
|
|
( |
|
|
|
p + raw_ext, |
|
|
|
p + ".raw.html", |
|
|
|
p + ".diff.html", |
|
|
|
p + ".meta.json", |
|
|
|
) |
|
|
|
) |
|
|
|
return |
|
|
|
|
|
|
|
########################################## |
|
|
|
## ENFORCE __PUBLISH__ MAGIC WORD |
|
|
|
########################################## |
|
|
|
if args.publish_opt_in and args.publish not in text: |
|
|
|
try_deleting( |
|
|
|
( |
|
|
|
p + raw_ext, |
|
|
|
p + ".raw.html", |
|
|
|
p + ".diff.html", |
|
|
|
p + ".meta.json", |
|
|
|
) |
|
|
|
continue |
|
|
|
|
|
|
|
ver["path"] = p + raw_ext |
|
|
|
ver["url"] = quote(ver["path"]) |
|
|
|
with open(ver["path"], "w") as f: |
|
|
|
f.write(text) |
|
|
|
# once the content is settled, compute a hash |
|
|
|
# and link it in the metadata! |
|
|
|
|
|
|
|
links = [] |
|
|
|
if args.css: |
|
|
|
links.append({"href": args.css, "rel": "stylesheet"}) |
|
|
|
# todo, make this process reflect which files actually were made |
|
|
|
versionbaseurl = quote(padid) |
|
|
|
) |
|
|
|
return |
|
|
|
|
|
|
|
ver["path"] = p + raw_ext |
|
|
|
ver["url"] = quote(ver["path"]) |
|
|
|
with open(ver["path"], "w") as f: |
|
|
|
f.write(text) |
|
|
|
# once the content is settled, compute a hash |
|
|
|
# and link it in the metadata! |
|
|
|
|
|
|
|
links = [] |
|
|
|
if args.css: |
|
|
|
links.append({"href": args.css, "rel": "stylesheet"}) |
|
|
|
# todo, make this process reflect which files actually were made |
|
|
|
versionbaseurl = quote(padid) |
|
|
|
links.append( |
|
|
|
{ |
|
|
|
"href": versions[0]["url"], |
|
|
|
"rel": "alternate", |
|
|
|
"type": "text/html", |
|
|
|
"title": "Etherpad", |
|
|
|
} |
|
|
|
) |
|
|
|
if args.all or args.text: |
|
|
|
links.append( |
|
|
|
{ |
|
|
|
"href": versionbaseurl + raw_ext, |
|
|
|
"rel": "alternate", |
|
|
|
"type": "text/plain", |
|
|
|
"title": "Plain text", |
|
|
|
} |
|
|
|
) |
|
|
|
if args.all or args.html: |
|
|
|
links.append( |
|
|
|
{ |
|
|
|
"href": versions[0]["url"], |
|
|
|
"href": versionbaseurl + ".raw.html", |
|
|
|
"rel": "alternate", |
|
|
|
"type": "text/html", |
|
|
|
"title": "Etherpad", |
|
|
|
"title": "HTML", |
|
|
|
} |
|
|
|
) |
|
|
|
if args.all or args.dhtml: |
|
|
|
links.append( |
|
|
|
{ |
|
|
|
"href": versionbaseurl + ".diff.html", |
|
|
|
"rel": "alternate", |
|
|
|
"type": "text/html", |
|
|
|
"title": "HTML with author colors", |
|
|
|
} |
|
|
|
) |
|
|
|
if args.all or args.meta: |
|
|
|
links.append( |
|
|
|
{ |
|
|
|
"href": versionbaseurl + ".meta.json", |
|
|
|
"rel": "alternate", |
|
|
|
"type": "application/json", |
|
|
|
"title": "Meta data", |
|
|
|
} |
|
|
|
) |
|
|
|
if args.all or args.text: |
|
|
|
links.append( |
|
|
|
{ |
|
|
|
"href": versionbaseurl + raw_ext, |
|
|
|
"rel": "alternate", |
|
|
|
"type": "text/plain", |
|
|
|
"title": "Plain text", |
|
|
|
} |
|
|
|
) |
|
|
|
if args.all or args.html: |
|
|
|
links.append( |
|
|
|
{ |
|
|
|
"href": versionbaseurl + ".raw.html", |
|
|
|
"rel": "alternate", |
|
|
|
"type": "text/html", |
|
|
|
"title": "HTML", |
|
|
|
} |
|
|
|
) |
|
|
|
if args.all or args.dhtml: |
|
|
|
links.append( |
|
|
|
{ |
|
|
|
"href": versionbaseurl + ".diff.html", |
|
|
|
"rel": "alternate", |
|
|
|
"type": "text/html", |
|
|
|
"title": "HTML with author colors", |
|
|
|
} |
|
|
|
) |
|
|
|
if args.all or args.meta: |
|
|
|
links.append( |
|
|
|
{ |
|
|
|
"href": versionbaseurl + ".meta.json", |
|
|
|
"rel": "alternate", |
|
|
|
"type": "application/json", |
|
|
|
"title": "Meta data", |
|
|
|
} |
|
|
|
) |
|
|
|
|
|
|
|
# links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"}) |
|
|
|
|
|
|
|
if args.all or args.dhtml: |
|
|
|
data['startRev'] = "0" |
|
|
|
html = getjson( |
|
|
|
info['localapiurl'] + 'createDiffHTML?' + urlencode(data) |
|
|
|
) |
|
|
|
ver = {"type": "diffhtml"} |
|
|
|
versions.append(ver) |
|
|
|
ver["code"] = html["_code"] |
|
|
|
if html["_code"] == 200: |
|
|
|
try: |
|
|
|
html = html['data']['html'] |
|
|
|
ver["path"] = p + ".diff.html" |
|
|
|
ver["url"] = quote(ver["path"]) |
|
|
|
# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False) |
|
|
|
doc = html5lib.parse( |
|
|
|
html, treebuilder="etree", namespaceHTMLElements=False |
|
|
|
) |
|
|
|
html5tidy( |
|
|
|
doc, |
|
|
|
indent=True, |
|
|
|
title=padid, |
|
|
|
scripts=args.script, |
|
|
|
links=links, |
|
|
|
) |
|
|
|
with open(ver["path"], "w") as f: |
|
|
|
print( |
|
|
|
ET.tostring(doc, method="html", encoding="unicode"), |
|
|
|
file=f, |
|
|
|
) |
|
|
|
except TypeError: |
|
|
|
# Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file! |
|
|
|
ver["message"] = html["message"] |
|
|
|
# with open(ver["path"], "w") as f: |
|
|
|
# print ("""<pre>{0}</pre>""".format(json.dumps(html, indent=2)), file=f) |
|
|
|
|
|
|
|
# Process text, html, dhtml, all options |
|
|
|
if args.all or args.html: |
|
|
|
html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data)) |
|
|
|
ver = {"type": "html"} |
|
|
|
versions.append(ver) |
|
|
|
ver["code"] = html["_code"] |
|
|
|
if html["_code"] == 200: |
|
|
|
if args.all or args.dhtml: |
|
|
|
data['startRev'] = "0" |
|
|
|
html = getjson( |
|
|
|
info['localapiurl'] + 'createDiffHTML?' + urlencode(data) |
|
|
|
) |
|
|
|
ver = {"type": "diffhtml"} |
|
|
|
versions.append(ver) |
|
|
|
ver["code"] = html["_code"] |
|
|
|
if html["_code"] == 200: |
|
|
|
try: |
|
|
|
html = html['data']['html'] |
|
|
|
ver["path"] = p + ".raw.html" |
|
|
|
ver["path"] = p + ".diff.html" |
|
|
|
ver["url"] = quote(ver["path"]) |
|
|
|
doc = html5lib.parse( |
|
|
|
html, treebuilder="etree", namespaceHTMLElements=False |
|
|
@ -456,12 +394,58 @@ def main(args): |
|
|
|
ET.tostring(doc, method="html", encoding="unicode"), |
|
|
|
file=f, |
|
|
|
) |
|
|
|
except TypeError: |
|
|
|
ver["message"] = html["message"] |
|
|
|
|
|
|
|
# Process text, html, dhtml, all options |
|
|
|
if args.all or args.html: |
|
|
|
html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data)) |
|
|
|
ver = {"type": "html"} |
|
|
|
versions.append(ver) |
|
|
|
ver["code"] = html["_code"] |
|
|
|
if html["_code"] == 200: |
|
|
|
html = html['data']['html'] |
|
|
|
ver["path"] = p + ".raw.html" |
|
|
|
ver["url"] = quote(ver["path"]) |
|
|
|
doc = html5lib.parse( |
|
|
|
html, treebuilder="etree", namespaceHTMLElements=False |
|
|
|
) |
|
|
|
html5tidy( |
|
|
|
doc, indent=True, title=padid, scripts=args.script, links=links, |
|
|
|
) |
|
|
|
with open(ver["path"], "w") as f: |
|
|
|
print( |
|
|
|
ET.tostring(doc, method="html", encoding="unicode"), file=f, |
|
|
|
) |
|
|
|
|
|
|
|
# output meta |
|
|
|
if args.all or args.meta: |
|
|
|
ver = {"type": "meta"} |
|
|
|
versions.append(ver) |
|
|
|
ver["path"] = metapath |
|
|
|
ver["url"] = quote(metapath) |
|
|
|
with open(metapath, "w") as f: |
|
|
|
json.dump(meta, f, indent=2) |
|
|
|
|
|
|
|
|
|
|
|
def main(args): |
|
|
|
p = build_argument_parser(args) |
|
|
|
args = p.parse_args(args) |
|
|
|
|
|
|
|
raw_ext = ".raw.txt" |
|
|
|
if args.no_raw_ext: |
|
|
|
raw_ext = "" |
|
|
|
|
|
|
|
info = loadpadinfo(args.padinfo) |
|
|
|
data = {} |
|
|
|
data['apikey'] = info['apikey'] |
|
|
|
|
|
|
|
padids = get_padids(args, info, data) |
|
|
|
|
|
|
|
progress_kwargs = {} |
|
|
|
if not istty(): |
|
|
|
progress_kwargs.update(dict(disable=True)) |
|
|
|
progress_pads = tqdm(iterable=padids, total=len(padids), **progress_kwargs) |
|
|
|
|
|
|
|
# output meta |
|
|
|
if args.all or args.meta: |
|
|
|
ver = {"type": "meta"} |
|
|
|
versions.append(ver) |
|
|
|
ver["path"] = metapath |
|
|
|
ver["url"] = quote(metapath) |
|
|
|
with open(metapath, "w") as f: |
|
|
|
json.dump(meta, f, indent=2) |
|
|
|
for index, padid in enumerate(progress_pads): |
|
|
|
handle_pad(args, index, padid, data, info, raw_ext) |
|
|
|