|
|
@ -1,15 +1,16 @@ |
|
|
|
from __future__ import print_function |
|
|
|
|
|
|
|
import re, os, json, sys |
|
|
|
from math import ceil, floor |
|
|
|
from time import sleep |
|
|
|
|
|
|
|
try: |
|
|
|
# python2 |
|
|
|
from urlparse import urlparse, urlunparse |
|
|
|
from urllib2 import urlopen, URLError, HTTPError |
|
|
|
from urllib import urlencode |
|
|
|
from urllib import quote_plus, unquote_plus |
|
|
|
from htmlentitydefs import name2codepoint |
|
|
|
from urllib.parse import urlparse, urlunparse |
|
|
|
from urllib.request import urlopen |
|
|
|
from urllib.error import URLError, HTTPError |
|
|
|
from urllib.parse import urlencode |
|
|
|
from urllib.parse import quote_plus, unquote_plus |
|
|
|
from html.entities import name2codepoint |
|
|
|
|
|
|
|
input = raw_input |
|
|
|
except ImportError: |
|
|
@ -24,12 +25,12 @@ def splitpadname (padid): |
|
|
|
if m: |
|
|
|
return(m.group(1), padid[m.end():]) |
|
|
|
else: |
|
|
|
return (u"", padid) |
|
|
|
return ("", padid) |
|
|
|
|
|
|
|
def padurl (padid, ): |
|
|
|
return padid |
|
|
|
|
|
|
|
def padpath (padid, pub_path=u"", group_path=u"", normalize=False): |
|
|
|
def padpath (padid, pub_path="", group_path="", normalize=False): |
|
|
|
g, p = splitpadname(padid) |
|
|
|
# if type(g) == unicode: |
|
|
|
# g = g.encode("utf-8") |
|
|
@ -48,7 +49,7 @@ def padpath (padid, pub_path=u"", group_path=u"", normalize=False): |
|
|
|
return os.path.join(pub_path, p) |
|
|
|
|
|
|
|
def padpath2id (path): |
|
|
|
if type(path) == unicode: |
|
|
|
if type(path) == str: |
|
|
|
path = path.encode("utf-8") |
|
|
|
dd, p = os.path.split(path) |
|
|
|
gname = dd.split("/")[-1] |
|
|
@ -95,7 +96,7 @@ def progressbar (i, num, label="", file=sys.stderr): |
|
|
|
percentage = int(floor(p*100)) |
|
|
|
bars = int(ceil(p*20)) |
|
|
|
bar = ("*"*bars) + ("-"*(20-bars)) |
|
|
|
msg = u"\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label) |
|
|
|
msg = "\r{0} {1}/{2} {3}... ".format(bar, (i+1), num, label) |
|
|
|
sys.stderr.write(msg) |
|
|
|
sys.stderr.flush() |
|
|
|
|
|
|
@ -114,15 +115,15 @@ def unescape(text): |
|
|
|
# character reference |
|
|
|
try: |
|
|
|
if text[:3] == "&#x": |
|
|
|
return unichr(int(text[3:-1], 16)) |
|
|
|
return chr(int(text[3:-1], 16)) |
|
|
|
else: |
|
|
|
return unichr(int(text[2:-1])) |
|
|
|
return chr(int(text[2:-1])) |
|
|
|
except ValueError: |
|
|
|
pass |
|
|
|
else: |
|
|
|
# named entity |
|
|
|
try: |
|
|
|
text = unichr(name2codepoint[text[1:-1]]) |
|
|
|
text = chr(name2codepoint[text[1:-1]]) |
|
|
|
except KeyError: |
|
|
|
pass |
|
|
|
return text # leave as is |
|
|
|