Move pad pulling into own function
This commit is contained in:
parent
97bcca145b
commit
b82f39a42d
@ -163,19 +163,7 @@ def build_argument_parser(args):
|
|||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def get_padids(args, info, data):
|
||||||
def main(args):
|
|
||||||
p = build_argument_parser(args)
|
|
||||||
args = p.parse_args(args)
|
|
||||||
|
|
||||||
raw_ext = ".raw.txt"
|
|
||||||
if args.no_raw_ext:
|
|
||||||
raw_ext = ""
|
|
||||||
|
|
||||||
info = loadpadinfo(args.padinfo)
|
|
||||||
data = {}
|
|
||||||
data['apikey'] = info['apikey']
|
|
||||||
|
|
||||||
if args.padid:
|
if args.padid:
|
||||||
padids = args.padid
|
padids = args.padid
|
||||||
elif args.glob:
|
elif args.glob:
|
||||||
@ -188,258 +176,208 @@ def main(args):
|
|||||||
info['localapiurl'] + 'listAllPads?' + urlencode(data)
|
info['localapiurl'] + 'listAllPads?' + urlencode(data)
|
||||||
)['data']['padIDs']
|
)['data']['padIDs']
|
||||||
padids.sort()
|
padids.sort()
|
||||||
numpads = len(padids)
|
return padids
|
||||||
# maxmsglen = 0
|
|
||||||
count = 0
|
|
||||||
|
|
||||||
progress_kwargs = {}
|
|
||||||
if not istty():
|
|
||||||
progress_kwargs.update(dict(disable=True))
|
|
||||||
progress_pads = tqdm(iterable=padids, total=len(padids), **progress_kwargs)
|
|
||||||
|
|
||||||
for i, padid in enumerate(progress_pads):
|
def handle_pad(args, index, padid, data, info, raw_ext):
|
||||||
if args.skip != None and i < args.skip:
|
if args.skip != None and index < args.skip:
|
||||||
continue
|
return
|
||||||
|
|
||||||
data['padID'] = padid
|
data['padID'] = padid
|
||||||
p = padpath(padid, args.pub, args.group, args.fix_names)
|
p = padpath(padid, args.pub, args.group, args.fix_names)
|
||||||
if args.folder:
|
if args.folder:
|
||||||
p = os.path.join(p, padid)
|
p = os.path.join(p, padid)
|
||||||
|
|
||||||
metapath = p + ".meta.json"
|
metapath = p + ".meta.json"
|
||||||
revisions = None
|
revisions = None
|
||||||
tries = 1
|
tries = 1
|
||||||
skip = False
|
skip = False
|
||||||
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
|
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
|
||||||
meta = {}
|
meta = {}
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
if os.path.exists(metapath):
|
if os.path.exists(metapath):
|
||||||
with open(metapath) as f:
|
with open(metapath) as f:
|
||||||
meta.update(json.load(f))
|
meta.update(json.load(f))
|
||||||
revisions = getjson(
|
revisions = getjson(
|
||||||
info['localapiurl']
|
info['localapiurl'] + 'getRevisionsCount?' + urlencode(data)
|
||||||
+ 'getRevisionsCount?'
|
)['data']['revisions']
|
||||||
+ urlencode(data)
|
if meta['revisions'] == revisions and not args.force:
|
||||||
)['data']['revisions']
|
|
||||||
if meta['revisions'] == revisions and not args.force:
|
|
||||||
skip = True
|
|
||||||
break
|
|
||||||
|
|
||||||
meta['padid'] = padid
|
|
||||||
versions = meta["versions"] = []
|
|
||||||
versions.append(
|
|
||||||
{
|
|
||||||
"url": padurlbase + quote(padid),
|
|
||||||
"type": "pad",
|
|
||||||
"code": 200,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
if revisions == None:
|
|
||||||
meta['revisions'] = getjson(
|
|
||||||
info['localapiurl']
|
|
||||||
+ 'getRevisionsCount?'
|
|
||||||
+ urlencode(data)
|
|
||||||
)['data']['revisions']
|
|
||||||
else:
|
|
||||||
meta['revisions'] = revisions
|
|
||||||
|
|
||||||
if (meta['revisions'] == 0) and (not args.zerorevs):
|
|
||||||
skip = True
|
skip = True
|
||||||
break
|
break
|
||||||
|
|
||||||
# todo: load more metadata!
|
meta['padid'] = padid
|
||||||
meta['group'], meta['pad'] = splitpadname(padid)
|
versions = meta["versions"] = []
|
||||||
meta['pathbase'] = p
|
versions.append(
|
||||||
meta['lastedited_raw'] = int(
|
{"url": padurlbase + quote(padid), "type": "pad", "code": 200,}
|
||||||
getjson(
|
)
|
||||||
info['localapiurl'] + 'getLastEdited?' + urlencode(data)
|
|
||||||
)['data']['lastEdited']
|
if revisions is None:
|
||||||
)
|
meta['revisions'] = getjson(
|
||||||
meta['lastedited_iso'] = datetime.fromtimestamp(
|
info['localapiurl'] + 'getRevisionsCount?' + urlencode(data)
|
||||||
int(meta['lastedited_raw']) / 1000
|
)['data']['revisions']
|
||||||
).isoformat()
|
else:
|
||||||
meta['author_ids'] = getjson(
|
meta['revisions'] = revisions
|
||||||
info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
|
|
||||||
)['data']['authorIDs']
|
if (meta['revisions'] == 0) and (not args.zerorevs):
|
||||||
|
skip = True
|
||||||
break
|
break
|
||||||
except HTTPError as e:
|
|
||||||
tries += 1
|
# todo: load more metadata!
|
||||||
if tries > 3:
|
meta['group'], meta['pad'] = splitpadname(padid)
|
||||||
print(
|
meta['pathbase'] = p
|
||||||
"Too many failures ({0}), skipping".format(padid),
|
meta['lastedited_raw'] = int(
|
||||||
file=sys.stderr,
|
getjson(
|
||||||
)
|
info['localapiurl'] + 'getLastEdited?' + urlencode(data)
|
||||||
skip = True
|
)['data']['lastEdited']
|
||||||
break
|
)
|
||||||
else:
|
meta['lastedited_iso'] = datetime.fromtimestamp(
|
||||||
sleep(3)
|
int(meta['lastedited_raw']) / 1000
|
||||||
except TypeError as e:
|
).isoformat()
|
||||||
|
meta['author_ids'] = getjson(
|
||||||
|
info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
|
||||||
|
)['data']['authorIDs']
|
||||||
|
break
|
||||||
|
except HTTPError as e:
|
||||||
|
tries += 1
|
||||||
|
if tries > 3:
|
||||||
print(
|
print(
|
||||||
"Type Error loading pad {0} (phantom pad?), skipping".format(
|
"Too many failures ({0}), skipping".format(padid),
|
||||||
padid
|
|
||||||
),
|
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
)
|
)
|
||||||
skip = True
|
skip = True
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
sleep(3)
|
||||||
|
except TypeError as e:
|
||||||
|
print(
|
||||||
|
"Type Error loading pad {0} (phantom pad?), skipping".format(
|
||||||
|
padid
|
||||||
|
),
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
skip = True
|
||||||
|
break
|
||||||
|
|
||||||
if skip:
|
if skip:
|
||||||
continue
|
return
|
||||||
|
|
||||||
count += 1
|
if args.output:
|
||||||
|
print(padid)
|
||||||
|
|
||||||
if args.output:
|
if args.all or (args.meta or args.text or args.html or args.dhtml):
|
||||||
print(padid)
|
try:
|
||||||
|
os.makedirs(os.path.split(metapath)[0])
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
if args.all or (args.meta or args.text or args.html or args.dhtml):
|
if args.all or args.text:
|
||||||
try:
|
text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))
|
||||||
os.makedirs(os.path.split(metapath)[0])
|
ver = {"type": "text"}
|
||||||
except OSError:
|
versions.append(ver)
|
||||||
pass
|
ver["code"] = text["_code"]
|
||||||
|
if text["_code"] == 200:
|
||||||
|
text = text['data']['text']
|
||||||
|
|
||||||
if args.all or args.text:
|
##########################################
|
||||||
text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))
|
## ENFORCE __NOPUBLISH__ MAGIC WORD
|
||||||
ver = {"type": "text"}
|
##########################################
|
||||||
versions.append(ver)
|
if args.nopublish and args.nopublish in text:
|
||||||
ver["code"] = text["_code"]
|
# NEED TO PURGE ANY EXISTING DOCS
|
||||||
if text["_code"] == 200:
|
try_deleting(
|
||||||
text = text['data']['text']
|
(
|
||||||
|
p + raw_ext,
|
||||||
##########################################
|
p + ".raw.html",
|
||||||
## ENFORCE __NOPUBLISH__ MAGIC WORD
|
p + ".diff.html",
|
||||||
##########################################
|
p + ".meta.json",
|
||||||
if args.nopublish and args.nopublish in text:
|
|
||||||
# NEED TO PURGE ANY EXISTING DOCS
|
|
||||||
try_deleting(
|
|
||||||
(
|
|
||||||
p + raw_ext,
|
|
||||||
p + ".raw.html",
|
|
||||||
p + ".diff.html",
|
|
||||||
p + ".meta.json",
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
continue
|
)
|
||||||
|
return
|
||||||
|
|
||||||
##########################################
|
##########################################
|
||||||
## ENFORCE __PUBLISH__ MAGIC WORD
|
## ENFORCE __PUBLISH__ MAGIC WORD
|
||||||
##########################################
|
##########################################
|
||||||
if args.publish_opt_in and args.publish not in text:
|
if args.publish_opt_in and args.publish not in text:
|
||||||
try_deleting(
|
try_deleting(
|
||||||
(
|
(
|
||||||
p + raw_ext,
|
p + raw_ext,
|
||||||
p + ".raw.html",
|
p + ".raw.html",
|
||||||
p + ".diff.html",
|
p + ".diff.html",
|
||||||
p + ".meta.json",
|
p + ".meta.json",
|
||||||
)
|
|
||||||
)
|
)
|
||||||
continue
|
)
|
||||||
|
return
|
||||||
|
|
||||||
ver["path"] = p + raw_ext
|
ver["path"] = p + raw_ext
|
||||||
ver["url"] = quote(ver["path"])
|
ver["url"] = quote(ver["path"])
|
||||||
with open(ver["path"], "w") as f:
|
with open(ver["path"], "w") as f:
|
||||||
f.write(text)
|
f.write(text)
|
||||||
# once the content is settled, compute a hash
|
# once the content is settled, compute a hash
|
||||||
# and link it in the metadata!
|
# and link it in the metadata!
|
||||||
|
|
||||||
links = []
|
links = []
|
||||||
if args.css:
|
if args.css:
|
||||||
links.append({"href": args.css, "rel": "stylesheet"})
|
links.append({"href": args.css, "rel": "stylesheet"})
|
||||||
# todo, make this process reflect which files actually were made
|
# todo, make this process reflect which files actually were made
|
||||||
versionbaseurl = quote(padid)
|
versionbaseurl = quote(padid)
|
||||||
|
links.append(
|
||||||
|
{
|
||||||
|
"href": versions[0]["url"],
|
||||||
|
"rel": "alternate",
|
||||||
|
"type": "text/html",
|
||||||
|
"title": "Etherpad",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if args.all or args.text:
|
||||||
links.append(
|
links.append(
|
||||||
{
|
{
|
||||||
"href": versions[0]["url"],
|
"href": versionbaseurl + raw_ext,
|
||||||
"rel": "alternate",
|
"rel": "alternate",
|
||||||
"type": "text/html",
|
"type": "text/plain",
|
||||||
"title": "Etherpad",
|
"title": "Plain text",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if args.all or args.html:
|
||||||
|
links.append(
|
||||||
|
{
|
||||||
|
"href": versionbaseurl + ".raw.html",
|
||||||
|
"rel": "alternate",
|
||||||
|
"type": "text/html",
|
||||||
|
"title": "HTML",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if args.all or args.dhtml:
|
||||||
|
links.append(
|
||||||
|
{
|
||||||
|
"href": versionbaseurl + ".diff.html",
|
||||||
|
"rel": "alternate",
|
||||||
|
"type": "text/html",
|
||||||
|
"title": "HTML with author colors",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if args.all or args.meta:
|
||||||
|
links.append(
|
||||||
|
{
|
||||||
|
"href": versionbaseurl + ".meta.json",
|
||||||
|
"rel": "alternate",
|
||||||
|
"type": "application/json",
|
||||||
|
"title": "Meta data",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
if args.all or args.text:
|
|
||||||
links.append(
|
|
||||||
{
|
|
||||||
"href": versionbaseurl + raw_ext,
|
|
||||||
"rel": "alternate",
|
|
||||||
"type": "text/plain",
|
|
||||||
"title": "Plain text",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
if args.all or args.html:
|
|
||||||
links.append(
|
|
||||||
{
|
|
||||||
"href": versionbaseurl + ".raw.html",
|
|
||||||
"rel": "alternate",
|
|
||||||
"type": "text/html",
|
|
||||||
"title": "HTML",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
if args.all or args.dhtml:
|
|
||||||
links.append(
|
|
||||||
{
|
|
||||||
"href": versionbaseurl + ".diff.html",
|
|
||||||
"rel": "alternate",
|
|
||||||
"type": "text/html",
|
|
||||||
"title": "HTML with author colors",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
if args.all or args.meta:
|
|
||||||
links.append(
|
|
||||||
{
|
|
||||||
"href": versionbaseurl + ".meta.json",
|
|
||||||
"rel": "alternate",
|
|
||||||
"type": "application/json",
|
|
||||||
"title": "Meta data",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"})
|
if args.all or args.dhtml:
|
||||||
|
data['startRev'] = "0"
|
||||||
if args.all or args.dhtml:
|
html = getjson(
|
||||||
data['startRev'] = "0"
|
info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
|
||||||
html = getjson(
|
)
|
||||||
info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
|
ver = {"type": "diffhtml"}
|
||||||
)
|
versions.append(ver)
|
||||||
ver = {"type": "diffhtml"}
|
ver["code"] = html["_code"]
|
||||||
versions.append(ver)
|
if html["_code"] == 200:
|
||||||
ver["code"] = html["_code"]
|
try:
|
||||||
if html["_code"] == 200:
|
|
||||||
try:
|
|
||||||
html = html['data']['html']
|
|
||||||
ver["path"] = p + ".diff.html"
|
|
||||||
ver["url"] = quote(ver["path"])
|
|
||||||
# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
|
|
||||||
doc = html5lib.parse(
|
|
||||||
html, treebuilder="etree", namespaceHTMLElements=False
|
|
||||||
)
|
|
||||||
html5tidy(
|
|
||||||
doc,
|
|
||||||
indent=True,
|
|
||||||
title=padid,
|
|
||||||
scripts=args.script,
|
|
||||||
links=links,
|
|
||||||
)
|
|
||||||
with open(ver["path"], "w") as f:
|
|
||||||
print(
|
|
||||||
ET.tostring(doc, method="html", encoding="unicode"),
|
|
||||||
file=f,
|
|
||||||
)
|
|
||||||
except TypeError:
|
|
||||||
# Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!
|
|
||||||
ver["message"] = html["message"]
|
|
||||||
# with open(ver["path"], "w") as f:
|
|
||||||
# print ("""<pre>{0}</pre>""".format(json.dumps(html, indent=2)), file=f)
|
|
||||||
|
|
||||||
# Process text, html, dhtml, all options
|
|
||||||
if args.all or args.html:
|
|
||||||
html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))
|
|
||||||
ver = {"type": "html"}
|
|
||||||
versions.append(ver)
|
|
||||||
ver["code"] = html["_code"]
|
|
||||||
if html["_code"] == 200:
|
|
||||||
html = html['data']['html']
|
html = html['data']['html']
|
||||||
ver["path"] = p + ".raw.html"
|
ver["path"] = p + ".diff.html"
|
||||||
ver["url"] = quote(ver["path"])
|
ver["url"] = quote(ver["path"])
|
||||||
doc = html5lib.parse(
|
doc = html5lib.parse(
|
||||||
html, treebuilder="etree", namespaceHTMLElements=False
|
html, treebuilder="etree", namespaceHTMLElements=False
|
||||||
@ -456,12 +394,58 @@ def main(args):
|
|||||||
ET.tostring(doc, method="html", encoding="unicode"),
|
ET.tostring(doc, method="html", encoding="unicode"),
|
||||||
file=f,
|
file=f,
|
||||||
)
|
)
|
||||||
|
except TypeError:
|
||||||
|
ver["message"] = html["message"]
|
||||||
|
|
||||||
# output meta
|
# Process text, html, dhtml, all options
|
||||||
if args.all or args.meta:
|
if args.all or args.html:
|
||||||
ver = {"type": "meta"}
|
html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))
|
||||||
versions.append(ver)
|
ver = {"type": "html"}
|
||||||
ver["path"] = metapath
|
versions.append(ver)
|
||||||
ver["url"] = quote(metapath)
|
ver["code"] = html["_code"]
|
||||||
with open(metapath, "w") as f:
|
if html["_code"] == 200:
|
||||||
json.dump(meta, f, indent=2)
|
html = html['data']['html']
|
||||||
|
ver["path"] = p + ".raw.html"
|
||||||
|
ver["url"] = quote(ver["path"])
|
||||||
|
doc = html5lib.parse(
|
||||||
|
html, treebuilder="etree", namespaceHTMLElements=False
|
||||||
|
)
|
||||||
|
html5tidy(
|
||||||
|
doc, indent=True, title=padid, scripts=args.script, links=links,
|
||||||
|
)
|
||||||
|
with open(ver["path"], "w") as f:
|
||||||
|
print(
|
||||||
|
ET.tostring(doc, method="html", encoding="unicode"), file=f,
|
||||||
|
)
|
||||||
|
|
||||||
|
# output meta
|
||||||
|
if args.all or args.meta:
|
||||||
|
ver = {"type": "meta"}
|
||||||
|
versions.append(ver)
|
||||||
|
ver["path"] = metapath
|
||||||
|
ver["url"] = quote(metapath)
|
||||||
|
with open(metapath, "w") as f:
|
||||||
|
json.dump(meta, f, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
p = build_argument_parser(args)
|
||||||
|
args = p.parse_args(args)
|
||||||
|
|
||||||
|
raw_ext = ".raw.txt"
|
||||||
|
if args.no_raw_ext:
|
||||||
|
raw_ext = ""
|
||||||
|
|
||||||
|
info = loadpadinfo(args.padinfo)
|
||||||
|
data = {}
|
||||||
|
data['apikey'] = info['apikey']
|
||||||
|
|
||||||
|
padids = get_padids(args, info, data)
|
||||||
|
|
||||||
|
progress_kwargs = {}
|
||||||
|
if not istty():
|
||||||
|
progress_kwargs.update(dict(disable=True))
|
||||||
|
progress_pads = tqdm(iterable=padids, total=len(padids), **progress_kwargs)
|
||||||
|
|
||||||
|
for index, padid in enumerate(progress_pads):
|
||||||
|
handle_pad(args, index, padid, data, info, raw_ext)
|
||||||
|
Loading…
Reference in New Issue
Block a user