You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
57 lines
1.2 KiB
57 lines
1.2 KiB
import unicodedata
|
|
import sys
|
|
import re
|
|
|
|
from setuptools.extern import six
|
|
|
|
|
|
# HFS Plus uses decomposed UTF-8
|
|
def decompose(path):
|
|
if isinstance(path, six.text_type):
|
|
return unicodedata.normalize('NFD', path)
|
|
try:
|
|
path = path.decode('utf-8')
|
|
path = unicodedata.normalize('NFD', path)
|
|
path = path.encode('utf-8')
|
|
except UnicodeError:
|
|
pass # Not UTF-8
|
|
return path
|
|
|
|
|
|
def filesys_decode(path):
|
|
"""
|
|
Ensure that the given path is decoded,
|
|
NONE when no expected encoding works
|
|
"""
|
|
|
|
if isinstance(path, six.text_type):
|
|
return path
|
|
|
|
fs_enc = sys.getfilesystemencoding() or 'utf-8'
|
|
candidates = fs_enc, 'utf-8'
|
|
|
|
for enc in candidates:
|
|
try:
|
|
return path.decode(enc)
|
|
except UnicodeDecodeError:
|
|
continue
|
|
|
|
|
|
def try_encode(string, enc):
|
|
"turn unicode encoding into a functional routine"
|
|
try:
|
|
return string.encode(enc)
|
|
except UnicodeEncodeError:
|
|
return None
|
|
|
|
|
|
CODING_RE = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
|
|
|
|
|
|
def detect_encoding(fp):
|
|
first_line = fp.readline()
|
|
fp.seek(0)
|
|
m = CODING_RE.match(first_line)
|
|
if m is None:
|
|
return None
|
|
return m.group(1).decode('ascii')
|
|
|