Pumping pads as files into publishing frameworks!
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

112 lines
3.2 KiB

from __future__ import print_function
import re, os, json, sys
from urllib import quote_plus, unquote_plus
from math import ceil, floor
from urllib2 import urlopen, HTTPError
from time import sleep
groupnamepat = re.compile(r"^g\.(\w+)\$")
def splitpadname (padid):
m = groupnamepat.match(padid)
if m:
return(m.group(1), padid[m.end():])
else:
return (u"", padid)
def padurl (padid, ):
return padid
def padpath (padid, pub_path=u"", group_path=u""):
g, p = splitpadname(padid)
if type(g) == unicode:
g = g.encode("utf-8")
if type(p) == unicode:
p = p.encode("utf-8")
p = quote_plus(p)
# p = p.replace(" ", "_")
# p = p.replace("*", "-")
if g:
return os.path.join(group_path, g, p)
else:
return os.path.join(pub_path, p)
def padpath2id (path):
if type(path) == unicode:
path = path.encode("utf-8")
dd, p = os.path.split(path)
gname = dd.split("/")[-1]
p = unquote_plus(p)
if gname:
return "{0}${1}".format(gname, p).decode("utf-8")
else:
return p.decode("utf-8")
def getjson (url, max_retry=3, retry_sleep_time=0.5):
ret = {}
ret["_retries"] = 0
while ret["_retries"] <= max_retry:
try:
f = urlopen(url)
data = f.read()
rurl = f.geturl()
f.close()
ret.update(json.loads(data))
ret["_code"] = f.getcode()
if rurl != url:
ret["_url"] = rurl
return ret
except ValueError as e:
url = "http://localhost" + url
except HTTPError as e:
print ("HTTPError {0}".format(e), file=sys.stderr)
ret["_code"] = e.code
ret["_retries"]+=1
if retry_sleep_time:
sleep(retry_sleep_time)
return ret
def loadpadinfo(p):
with open(p) as f:
info = json.load(f)
return info
def progressbar (i, num, label="", file=sys.stderr):
p = float(i) / num
percentage = int(floor(p*100))
bars = int(ceil(p*20))
bar = ("*"*bars) + ("-"*(20-bars))
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label)
sys.stderr.write(msg.encode("utf-8"))
sys.stderr.flush()
# Python developer Fredrik Lundh (author of elementtree, among other things) has such a function on his website, which works with decimal, hex and named entities:
import re, htmlentitydefs
##
# Removes HTML or XML character references and entities from a text string.
#
# @param text The HTML (or XML) source text.
# @return The plain text, as a Unicode string, if necessary.
def unescape(text):
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)