Move pad pulling into own function

This commit is contained in:
Luke Murphy 2020-01-19 00:03:00 +01:00
parent 97bcca145b
commit b82f39a42d
No known key found for this signature in database
GPG Key ID: 5E2EF5A63E3718CC

View File

@ -163,19 +163,7 @@ def build_argument_parser(args):
return parser
def main(args):
p = build_argument_parser(args)
args = p.parse_args(args)
raw_ext = ".raw.txt"
if args.no_raw_ext:
raw_ext = ""
info = loadpadinfo(args.padinfo)
data = {}
data['apikey'] = info['apikey']
def get_padids(args, info, data):
if args.padid:
padids = args.padid
elif args.glob:
@ -188,258 +176,208 @@ def main(args):
info['localapiurl'] + 'listAllPads?' + urlencode(data)
)['data']['padIDs']
padids.sort()
numpads = len(padids)
# maxmsglen = 0
count = 0
return padids
progress_kwargs = {}
if not istty():
progress_kwargs.update(dict(disable=True))
progress_pads = tqdm(iterable=padids, total=len(padids), **progress_kwargs)
for i, padid in enumerate(progress_pads):
if args.skip != None and i < args.skip:
continue
def handle_pad(args, index, padid, data, info, raw_ext):
if args.skip != None and index < args.skip:
return
data['padID'] = padid
p = padpath(padid, args.pub, args.group, args.fix_names)
if args.folder:
p = os.path.join(p, padid)
data['padID'] = padid
p = padpath(padid, args.pub, args.group, args.fix_names)
if args.folder:
p = os.path.join(p, padid)
metapath = p + ".meta.json"
revisions = None
tries = 1
skip = False
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
meta = {}
metapath = p + ".meta.json"
revisions = None
tries = 1
skip = False
padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
meta = {}
while True:
try:
if os.path.exists(metapath):
with open(metapath) as f:
meta.update(json.load(f))
revisions = getjson(
info['localapiurl']
+ 'getRevisionsCount?'
+ urlencode(data)
)['data']['revisions']
if meta['revisions'] == revisions and not args.force:
skip = True
break
meta['padid'] = padid
versions = meta["versions"] = []
versions.append(
{
"url": padurlbase + quote(padid),
"type": "pad",
"code": 200,
}
)
if revisions == None:
meta['revisions'] = getjson(
info['localapiurl']
+ 'getRevisionsCount?'
+ urlencode(data)
)['data']['revisions']
else:
meta['revisions'] = revisions
if (meta['revisions'] == 0) and (not args.zerorevs):
while True:
try:
if os.path.exists(metapath):
with open(metapath) as f:
meta.update(json.load(f))
revisions = getjson(
info['localapiurl'] + 'getRevisionsCount?' + urlencode(data)
)['data']['revisions']
if meta['revisions'] == revisions and not args.force:
skip = True
break
# todo: load more metadata!
meta['group'], meta['pad'] = splitpadname(padid)
meta['pathbase'] = p
meta['lastedited_raw'] = int(
getjson(
info['localapiurl'] + 'getLastEdited?' + urlencode(data)
)['data']['lastEdited']
)
meta['lastedited_iso'] = datetime.fromtimestamp(
int(meta['lastedited_raw']) / 1000
).isoformat()
meta['author_ids'] = getjson(
info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
)['data']['authorIDs']
meta['padid'] = padid
versions = meta["versions"] = []
versions.append(
{"url": padurlbase + quote(padid), "type": "pad", "code": 200,}
)
if revisions is None:
meta['revisions'] = getjson(
info['localapiurl'] + 'getRevisionsCount?' + urlencode(data)
)['data']['revisions']
else:
meta['revisions'] = revisions
if (meta['revisions'] == 0) and (not args.zerorevs):
skip = True
break
except HTTPError as e:
tries += 1
if tries > 3:
print(
"Too many failures ({0}), skipping".format(padid),
file=sys.stderr,
)
skip = True
break
else:
sleep(3)
except TypeError as e:
# todo: load more metadata!
meta['group'], meta['pad'] = splitpadname(padid)
meta['pathbase'] = p
meta['lastedited_raw'] = int(
getjson(
info['localapiurl'] + 'getLastEdited?' + urlencode(data)
)['data']['lastEdited']
)
meta['lastedited_iso'] = datetime.fromtimestamp(
int(meta['lastedited_raw']) / 1000
).isoformat()
meta['author_ids'] = getjson(
info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
)['data']['authorIDs']
break
except HTTPError as e:
tries += 1
if tries > 3:
print(
"Type Error loading pad {0} (phantom pad?), skipping".format(
padid
),
"Too many failures ({0}), skipping".format(padid),
file=sys.stderr,
)
skip = True
break
else:
sleep(3)
except TypeError as e:
print(
"Type Error loading pad {0} (phantom pad?), skipping".format(
padid
),
file=sys.stderr,
)
skip = True
break
if skip:
continue
if skip:
return
count += 1
if args.output:
print(padid)
if args.output:
print(padid)
if args.all or (args.meta or args.text or args.html or args.dhtml):
try:
os.makedirs(os.path.split(metapath)[0])
except OSError:
pass
if args.all or (args.meta or args.text or args.html or args.dhtml):
try:
os.makedirs(os.path.split(metapath)[0])
except OSError:
pass
if args.all or args.text:
text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))
ver = {"type": "text"}
versions.append(ver)
ver["code"] = text["_code"]
if text["_code"] == 200:
text = text['data']['text']
if args.all or args.text:
text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))
ver = {"type": "text"}
versions.append(ver)
ver["code"] = text["_code"]
if text["_code"] == 200:
text = text['data']['text']
##########################################
## ENFORCE __NOPUBLISH__ MAGIC WORD
##########################################
if args.nopublish and args.nopublish in text:
# NEED TO PURGE ANY EXISTING DOCS
try_deleting(
(
p + raw_ext,
p + ".raw.html",
p + ".diff.html",
p + ".meta.json",
)
##########################################
## ENFORCE __NOPUBLISH__ MAGIC WORD
##########################################
if args.nopublish and args.nopublish in text:
# NEED TO PURGE ANY EXISTING DOCS
try_deleting(
(
p + raw_ext,
p + ".raw.html",
p + ".diff.html",
p + ".meta.json",
)
continue
)
return
##########################################
## ENFORCE __PUBLISH__ MAGIC WORD
##########################################
if args.publish_opt_in and args.publish not in text:
try_deleting(
(
p + raw_ext,
p + ".raw.html",
p + ".diff.html",
p + ".meta.json",
)
##########################################
## ENFORCE __PUBLISH__ MAGIC WORD
##########################################
if args.publish_opt_in and args.publish not in text:
try_deleting(
(
p + raw_ext,
p + ".raw.html",
p + ".diff.html",
p + ".meta.json",
)
continue
)
return
ver["path"] = p + raw_ext
ver["url"] = quote(ver["path"])
with open(ver["path"], "w") as f:
f.write(text)
# once the content is settled, compute a hash
# and link it in the metadata!
ver["path"] = p + raw_ext
ver["url"] = quote(ver["path"])
with open(ver["path"], "w") as f:
f.write(text)
# once the content is settled, compute a hash
# and link it in the metadata!
links = []
if args.css:
links.append({"href": args.css, "rel": "stylesheet"})
# todo, make this process reflect which files actually were made
versionbaseurl = quote(padid)
links = []
if args.css:
links.append({"href": args.css, "rel": "stylesheet"})
# todo, make this process reflect which files actually were made
versionbaseurl = quote(padid)
links.append(
{
"href": versions[0]["url"],
"rel": "alternate",
"type": "text/html",
"title": "Etherpad",
}
)
if args.all or args.text:
links.append(
{
"href": versions[0]["url"],
"href": versionbaseurl + raw_ext,
"rel": "alternate",
"type": "text/html",
"title": "Etherpad",
"type": "text/plain",
"title": "Plain text",
}
)
if args.all or args.html:
links.append(
{
"href": versionbaseurl + ".raw.html",
"rel": "alternate",
"type": "text/html",
"title": "HTML",
}
)
if args.all or args.dhtml:
links.append(
{
"href": versionbaseurl + ".diff.html",
"rel": "alternate",
"type": "text/html",
"title": "HTML with author colors",
}
)
if args.all or args.meta:
links.append(
{
"href": versionbaseurl + ".meta.json",
"rel": "alternate",
"type": "application/json",
"title": "Meta data",
}
)
if args.all or args.text:
links.append(
{
"href": versionbaseurl + raw_ext,
"rel": "alternate",
"type": "text/plain",
"title": "Plain text",
}
)
if args.all or args.html:
links.append(
{
"href": versionbaseurl + ".raw.html",
"rel": "alternate",
"type": "text/html",
"title": "HTML",
}
)
if args.all or args.dhtml:
links.append(
{
"href": versionbaseurl + ".diff.html",
"rel": "alternate",
"type": "text/html",
"title": "HTML with author colors",
}
)
if args.all or args.meta:
links.append(
{
"href": versionbaseurl + ".meta.json",
"rel": "alternate",
"type": "application/json",
"title": "Meta data",
}
)
# links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"})
if args.all or args.dhtml:
data['startRev'] = "0"
html = getjson(
info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
)
ver = {"type": "diffhtml"}
versions.append(ver)
ver["code"] = html["_code"]
if html["_code"] == 200:
try:
html = html['data']['html']
ver["path"] = p + ".diff.html"
ver["url"] = quote(ver["path"])
# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
doc = html5lib.parse(
html, treebuilder="etree", namespaceHTMLElements=False
)
html5tidy(
doc,
indent=True,
title=padid,
scripts=args.script,
links=links,
)
with open(ver["path"], "w") as f:
print(
ET.tostring(doc, method="html", encoding="unicode"),
file=f,
)
except TypeError:
# Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!
ver["message"] = html["message"]
# with open(ver["path"], "w") as f:
# print ("""<pre>{0}</pre>""".format(json.dumps(html, indent=2)), file=f)
# Process text, html, dhtml, all options
if args.all or args.html:
html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))
ver = {"type": "html"}
versions.append(ver)
ver["code"] = html["_code"]
if html["_code"] == 200:
if args.all or args.dhtml:
data['startRev'] = "0"
html = getjson(
info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
)
ver = {"type": "diffhtml"}
versions.append(ver)
ver["code"] = html["_code"]
if html["_code"] == 200:
try:
html = html['data']['html']
ver["path"] = p + ".raw.html"
ver["path"] = p + ".diff.html"
ver["url"] = quote(ver["path"])
doc = html5lib.parse(
html, treebuilder="etree", namespaceHTMLElements=False
@ -456,12 +394,58 @@ def main(args):
ET.tostring(doc, method="html", encoding="unicode"),
file=f,
)
except TypeError:
ver["message"] = html["message"]
# output meta
if args.all or args.meta:
ver = {"type": "meta"}
versions.append(ver)
ver["path"] = metapath
ver["url"] = quote(metapath)
with open(metapath, "w") as f:
json.dump(meta, f, indent=2)
# Process text, html, dhtml, all options
if args.all or args.html:
html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))
ver = {"type": "html"}
versions.append(ver)
ver["code"] = html["_code"]
if html["_code"] == 200:
html = html['data']['html']
ver["path"] = p + ".raw.html"
ver["url"] = quote(ver["path"])
doc = html5lib.parse(
html, treebuilder="etree", namespaceHTMLElements=False
)
html5tidy(
doc, indent=True, title=padid, scripts=args.script, links=links,
)
with open(ver["path"], "w") as f:
print(
ET.tostring(doc, method="html", encoding="unicode"), file=f,
)
# output meta
if args.all or args.meta:
ver = {"type": "meta"}
versions.append(ver)
ver["path"] = metapath
ver["url"] = quote(metapath)
with open(metapath, "w") as f:
json.dump(meta, f, indent=2)
def main(args):
p = build_argument_parser(args)
args = p.parse_args(args)
raw_ext = ".raw.txt"
if args.no_raw_ext:
raw_ext = ""
info = loadpadinfo(args.padinfo)
data = {}
data['apikey'] = info['apikey']
padids = get_padids(args, info, data)
progress_kwargs = {}
if not istty():
progress_kwargs.update(dict(disable=True))
progress_pads = tqdm(iterable=padids, total=len(padids), **progress_kwargs)
for index, padid in enumerate(progress_pads):
handle_pad(args, index, padid, data, info, raw_ext)