diff --git a/etherdump b/etherdump index a50d86a..2e3da39 100755 --- a/etherdump +++ b/etherdump @@ -68,12 +68,26 @@ class PadServer (object): def getPadLastEdited (self, padID): data = {'apikey': self.apikey, 'padID': padID.encode("utf-8")} raw = json.load(urlopen(self.apiurl+'getLastEdited?'+urlencode(data)))['data']['lastEdited'] - return datetime.fromtimestamp(int(raw)/1000) + try: + return datetime.fromtimestamp(int(raw)/1000) + except TypeError as e: + return None - def getPadURL (self, padID): + def getPadURL (self, padID, groupinfo=None): group, name = pad_split_group(padID) if group: - return self.protocol+"://"+self.hostname+"/p/"+padID + gid = group + if gid.startswith("g."): + gid = gid[2:] + if groupinfo: + ginfo = groupinfo.get(gid) + if ginfo: + groupID = ginfo.get("id", 0) + else: + groupID = 0 + else: + groupID = 0 + return self.protocol+"://"+self.hostname+"/group.html/"+str(groupID)+"/pad.html/"+padID else: return self.protocol+"://"+self.hostname+"/public_pad/"+padID @@ -89,7 +103,7 @@ def get_template_env (tpath=None): # template = env.get_template('pad.html') # print template.render(the='variables', go='here').encode("utf-8") -def dumpPads (padserver, padids, outputpath, pub_path, group_path, sleeptime=0.01, force=False, templates=None): +def dumpPads (padserver, padids, outputpath, pub_path, group_path, sleeptime=0.01, force=False, templates=None, groupinfo=None): template_env = get_template_env(templates) pad_template = template_env.get_template("pad.html") numpads = len(padids) @@ -123,12 +137,16 @@ def dumpPads (padserver, padids, outputpath, pub_path, group_path, sleeptime=0.0 htmlpath = fp+".html" metapath = fp+".json" - last_edited = padserver.getPadLastEdited(padid).isoformat() + last_edited = padserver.getPadLastEdited(padid) + if last_edited: + last_edited = last_edited.isoformat() + else: + last_edited = '' if os.path.exists(metapath): with open(metapath) as f: meta = json.load(f) - if not force and meta.get("last_edited") == last_edited: + if not force and meta.get("last_edited") and meta.get("last_edited") == last_edited: if verbose: print("Up to date, skipping", file=sys.stderr) continue @@ -162,7 +180,7 @@ def dumpPads (padserver, padids, outputpath, pub_path, group_path, sleeptime=0.0 t = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False) body = t.find(".//body") title = padid - editurl = padserver.getPadURL(padid) + editurl = padserver.getPadURL(padid, groupinfo) meta['url'] = editurl json_dump = json.dumps(meta) f.write(pad_template.render( @@ -333,6 +351,14 @@ if __name__ == "__main__": print(gid) elif cmd == "dump": + groupinfo = None + if args.groupinfo: + with open(args.groupinfo) as gif: + groupinfo = json.load(gif) + + if verbose: + print ("Using groupinfo", file=sys.stderr) + start = time.time() padids = padserver.listAllPads() if args.skip: @@ -347,7 +373,8 @@ if __name__ == "__main__": args.pubpath, args.grouppath, force=args.force, - templates=args.templates) + templates=args.templates, + groupinfo=groupinfo) if verbose: print ("Completed in {0:0.0f} seconds".format(time.time()-start), file=sys.stderr)