forked from varia/varia.website
400 lines
13 KiB
Python
400 lines
13 KiB
Python
|
# -*- coding: utf-8 -*-
|
|||
|
"""This module parses and generates contentlines as defined in RFC 2445
|
|||
|
(iCalendar), but will probably work for other MIME types with similar syntax.
|
|||
|
Eg. RFC 2426 (vCard)
|
|||
|
|
|||
|
It is stupid in the sense that it treats the content purely as strings. No type
|
|||
|
conversion is attempted.
|
|||
|
"""
|
|||
|
from __future__ import unicode_literals
|
|||
|
|
|||
|
from icalendar import compat
|
|||
|
from icalendar.caselessdict import CaselessDict
|
|||
|
from icalendar.parser_tools import DEFAULT_ENCODING
|
|||
|
from icalendar.parser_tools import SEQUENCE_TYPES
|
|||
|
from icalendar.parser_tools import to_unicode
|
|||
|
|
|||
|
import re
|
|||
|
|
|||
|
|
|||
|
def escape_char(text):
|
|||
|
"""Format value according to iCalendar TEXT escaping rules.
|
|||
|
"""
|
|||
|
assert isinstance(text, (compat.unicode_type, compat.bytes_type))
|
|||
|
# NOTE: ORDER MATTERS!
|
|||
|
return text.replace(r'\N', '\n')\
|
|||
|
.replace('\\', '\\\\')\
|
|||
|
.replace(';', r'\;')\
|
|||
|
.replace(',', r'\,')\
|
|||
|
.replace('\r\n', r'\n')\
|
|||
|
.replace('\n', r'\n')
|
|||
|
|
|||
|
|
|||
|
def unescape_char(text):
|
|||
|
assert isinstance(text, (compat.unicode_type, compat.bytes_type))
|
|||
|
# NOTE: ORDER MATTERS!
|
|||
|
if isinstance(text, compat.unicode_type):
|
|||
|
return text.replace('\\N', '\\n')\
|
|||
|
.replace('\r\n', '\n')\
|
|||
|
.replace('\\n', '\n')\
|
|||
|
.replace('\\,', ',')\
|
|||
|
.replace('\\;', ';')\
|
|||
|
.replace('\\\\', '\\')
|
|||
|
elif isinstance(text, compat.bytes_type):
|
|||
|
return text.replace(b'\\N', b'\\n')\
|
|||
|
.replace(b'\r\n', b'\n')\
|
|||
|
.replace(b'\n', b'\n')\
|
|||
|
.replace(b'\\,', b',')\
|
|||
|
.replace(b'\\;', b';')\
|
|||
|
.replace(b'\\\\', b'\\')
|
|||
|
|
|||
|
|
|||
|
def tzid_from_dt(dt):
|
|||
|
tzid = None
|
|||
|
if hasattr(dt.tzinfo, 'zone'):
|
|||
|
tzid = dt.tzinfo.zone # pytz implementation
|
|||
|
elif hasattr(dt.tzinfo, 'tzname'):
|
|||
|
try:
|
|||
|
tzid = dt.tzinfo.tzname(dt) # dateutil implementation
|
|||
|
except AttributeError:
|
|||
|
# No tzid available
|
|||
|
pass
|
|||
|
return tzid
|
|||
|
|
|||
|
|
|||
|
def foldline(line, limit=75, fold_sep='\r\n '):
|
|||
|
"""Make a string folded as defined in RFC5545
|
|||
|
Lines of text SHOULD NOT be longer than 75 octets, excluding the line
|
|||
|
break. Long content lines SHOULD be split into a multiple line
|
|||
|
representations using a line "folding" technique. That is, a long
|
|||
|
line can be split between any two characters by inserting a CRLF
|
|||
|
immediately followed by a single linear white-space character (i.e.,
|
|||
|
SPACE or HTAB).
|
|||
|
"""
|
|||
|
assert isinstance(line, compat.unicode_type)
|
|||
|
assert '\n' not in line
|
|||
|
|
|||
|
# Use a fast and simple variant for the common case that line is all ASCII.
|
|||
|
try:
|
|||
|
line.encode('ascii')
|
|||
|
except (UnicodeEncodeError, UnicodeDecodeError):
|
|||
|
pass
|
|||
|
else:
|
|||
|
return fold_sep.join(
|
|||
|
line[i:i + limit - 1] for i in range(0, len(line), limit - 1)
|
|||
|
)
|
|||
|
|
|||
|
ret_chars = []
|
|||
|
byte_count = 0
|
|||
|
for char in line:
|
|||
|
char_byte_len = len(char.encode(DEFAULT_ENCODING))
|
|||
|
byte_count += char_byte_len
|
|||
|
if byte_count >= limit:
|
|||
|
ret_chars.append(fold_sep)
|
|||
|
byte_count = char_byte_len
|
|||
|
ret_chars.append(char)
|
|||
|
|
|||
|
return ''.join(ret_chars)
|
|||
|
|
|||
|
|
|||
|
#################################################################
|
|||
|
# Property parameter stuff
|
|||
|
|
|||
|
def param_value(value):
|
|||
|
"""Returns a parameter value.
|
|||
|
"""
|
|||
|
if isinstance(value, SEQUENCE_TYPES):
|
|||
|
return q_join(value)
|
|||
|
return dquote(value)
|
|||
|
|
|||
|
|
|||
|
# Could be improved
|
|||
|
|
|||
|
# [\w-] because of the iCalendar RFC
|
|||
|
# . because of the vCard RFC
|
|||
|
NAME = re.compile(r'[\w.-]+')
|
|||
|
|
|||
|
UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7F",:;]')
|
|||
|
QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7F"]')
|
|||
|
FOLD = re.compile(b'(\r?\n)+[ \t]')
|
|||
|
uFOLD = re.compile('(\r?\n)+[ \t]')
|
|||
|
NEWLINE = re.compile(r'\r?\n')
|
|||
|
|
|||
|
|
|||
|
def validate_token(name):
|
|||
|
match = NAME.findall(name)
|
|||
|
if len(match) == 1 and name == match[0]:
|
|||
|
return
|
|||
|
raise ValueError(name)
|
|||
|
|
|||
|
|
|||
|
def validate_param_value(value, quoted=True):
|
|||
|
validator = QUNSAFE_CHAR if quoted else UNSAFE_CHAR
|
|||
|
if validator.findall(value):
|
|||
|
raise ValueError(value)
|
|||
|
|
|||
|
|
|||
|
# chars presence of which in parameter value will be cause the value
|
|||
|
# to be enclosed in double-quotes
|
|||
|
QUOTABLE = re.compile("[,;: ’']")
|
|||
|
|
|||
|
|
|||
|
def dquote(val):
|
|||
|
"""Enclose parameter values containing [,;:] in double quotes.
|
|||
|
"""
|
|||
|
# a double-quote character is forbidden to appear in a parameter value
|
|||
|
# so replace it with a single-quote character
|
|||
|
val = val.replace('"', "'")
|
|||
|
if QUOTABLE.search(val):
|
|||
|
return '"%s"' % val
|
|||
|
return val
|
|||
|
|
|||
|
|
|||
|
# parsing helper
|
|||
|
def q_split(st, sep=',', maxsplit=-1):
|
|||
|
"""Splits a string on char, taking double (q)uotes into considderation.
|
|||
|
"""
|
|||
|
if maxsplit == 0:
|
|||
|
return [st]
|
|||
|
|
|||
|
result = []
|
|||
|
cursor = 0
|
|||
|
length = len(st)
|
|||
|
inquote = 0
|
|||
|
splits = 0
|
|||
|
for i in range(length):
|
|||
|
ch = st[i]
|
|||
|
if ch == '"':
|
|||
|
inquote = not inquote
|
|||
|
if not inquote and ch == sep:
|
|||
|
result.append(st[cursor:i])
|
|||
|
cursor = i + 1
|
|||
|
splits += 1
|
|||
|
if i + 1 == length or splits == maxsplit:
|
|||
|
result.append(st[cursor:])
|
|||
|
break
|
|||
|
return result
|
|||
|
|
|||
|
|
|||
|
def q_join(lst, sep=','):
|
|||
|
"""Joins a list on sep, quoting strings with QUOTABLE chars.
|
|||
|
"""
|
|||
|
return sep.join(dquote(itm) for itm in lst)
|
|||
|
|
|||
|
|
|||
|
class Parameters(CaselessDict):
|
|||
|
"""Parser and generator of Property parameter strings. It knows nothing of
|
|||
|
datatypes. Its main concern is textual structure.
|
|||
|
"""
|
|||
|
|
|||
|
def params(self):
|
|||
|
"""In rfc2445 keys are called parameters, so this is to be consitent
|
|||
|
with the naming conventions.
|
|||
|
"""
|
|||
|
return self.keys()
|
|||
|
|
|||
|
# TODO?
|
|||
|
# Later, when I get more time... need to finish this off now. The last major
|
|||
|
# thing missing.
|
|||
|
# def _encode(self, name, value, cond=1):
|
|||
|
# # internal, for conditional convertion of values.
|
|||
|
# if cond:
|
|||
|
# klass = types_factory.for_property(name)
|
|||
|
# return klass(value)
|
|||
|
# return value
|
|||
|
#
|
|||
|
# def add(self, name, value, encode=0):
|
|||
|
# "Add a parameter value and optionally encode it."
|
|||
|
# if encode:
|
|||
|
# value = self._encode(name, value, encode)
|
|||
|
# self[name] = value
|
|||
|
#
|
|||
|
# def decoded(self, name):
|
|||
|
# "returns a decoded value, or list of same"
|
|||
|
|
|||
|
def to_ical(self, sorted=True):
|
|||
|
result = []
|
|||
|
items = list(self.items())
|
|||
|
if sorted:
|
|||
|
items.sort()
|
|||
|
|
|||
|
for key, value in items:
|
|||
|
value = param_value(value)
|
|||
|
if isinstance(value, compat.unicode_type):
|
|||
|
value = value.encode(DEFAULT_ENCODING)
|
|||
|
# CaselessDict keys are always unicode
|
|||
|
key = key.upper().encode(DEFAULT_ENCODING)
|
|||
|
result.append(key + b'=' + value)
|
|||
|
return b';'.join(result)
|
|||
|
|
|||
|
@classmethod
|
|||
|
def from_ical(cls, st, strict=False):
|
|||
|
"""Parses the parameter format from ical text format."""
|
|||
|
|
|||
|
# parse into strings
|
|||
|
result = cls()
|
|||
|
for param in q_split(st, ';'):
|
|||
|
try:
|
|||
|
key, val = q_split(param, '=', maxsplit=1)
|
|||
|
validate_token(key)
|
|||
|
# Property parameter values that are not in quoted
|
|||
|
# strings are case insensitive.
|
|||
|
vals = []
|
|||
|
for v in q_split(val, ','):
|
|||
|
if v.startswith('"') and v.endswith('"'):
|
|||
|
v = v.strip('"')
|
|||
|
validate_param_value(v, quoted=True)
|
|||
|
vals.append(v)
|
|||
|
else:
|
|||
|
validate_param_value(v, quoted=False)
|
|||
|
if strict:
|
|||
|
vals.append(v.upper())
|
|||
|
else:
|
|||
|
vals.append(v)
|
|||
|
if not vals:
|
|||
|
result[key] = val
|
|||
|
else:
|
|||
|
if len(vals) == 1:
|
|||
|
result[key] = vals[0]
|
|||
|
else:
|
|||
|
result[key] = vals
|
|||
|
except ValueError as exc:
|
|||
|
raise ValueError('%r is not a valid parameter string: %s'
|
|||
|
% (param, exc))
|
|||
|
return result
|
|||
|
|
|||
|
|
|||
|
def escape_string(val):
|
|||
|
# '%{:02X}'.format(i)
|
|||
|
return val.replace(r'\,', '%2C').replace(r'\:', '%3A')\
|
|||
|
.replace(r'\;', '%3B').replace(r'\\', '%5C')
|
|||
|
|
|||
|
|
|||
|
def unescape_string(val):
|
|||
|
return val.replace('%2C', ',').replace('%3A', ':')\
|
|||
|
.replace('%3B', ';').replace('%5C', '\\')
|
|||
|
|
|||
|
|
|||
|
def unescape_list_or_string(val):
|
|||
|
if isinstance(val, list):
|
|||
|
return [unescape_string(s) for s in val]
|
|||
|
else:
|
|||
|
return unescape_string(val)
|
|||
|
|
|||
|
|
|||
|
#########################################
|
|||
|
# parsing and generation of content lines
|
|||
|
|
|||
|
class Contentline(compat.unicode_type):
|
|||
|
"""A content line is basically a string that can be folded and parsed into
|
|||
|
parts.
|
|||
|
"""
|
|||
|
def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING):
|
|||
|
value = to_unicode(value, encoding=encoding)
|
|||
|
assert '\n' not in value, ('Content line can not contain unescaped '
|
|||
|
'new line characters.')
|
|||
|
self = super(Contentline, cls).__new__(cls, value)
|
|||
|
self.strict = strict
|
|||
|
return self
|
|||
|
|
|||
|
@classmethod
|
|||
|
def from_parts(cls, name, params, values, sorted=True):
|
|||
|
"""Turn a parts into a content line.
|
|||
|
"""
|
|||
|
assert isinstance(params, Parameters)
|
|||
|
if hasattr(values, 'to_ical'):
|
|||
|
values = values.to_ical()
|
|||
|
else:
|
|||
|
values = vText(values).to_ical()
|
|||
|
# elif isinstance(values, basestring):
|
|||
|
# values = escape_char(values)
|
|||
|
|
|||
|
# TODO: after unicode only, remove this
|
|||
|
# Convert back to unicode, after to_ical encoded it.
|
|||
|
name = to_unicode(name)
|
|||
|
values = to_unicode(values)
|
|||
|
if params:
|
|||
|
params = to_unicode(params.to_ical(sorted=sorted))
|
|||
|
return cls('%s;%s:%s' % (name, params, values))
|
|||
|
return cls('%s:%s' % (name, values))
|
|||
|
|
|||
|
def parts(self):
|
|||
|
"""Split the content line up into (name, parameters, values) parts.
|
|||
|
"""
|
|||
|
try:
|
|||
|
st = escape_string(self)
|
|||
|
name_split = None
|
|||
|
value_split = None
|
|||
|
in_quotes = False
|
|||
|
for i, ch in enumerate(st):
|
|||
|
if not in_quotes:
|
|||
|
if ch in ':;' and not name_split:
|
|||
|
name_split = i
|
|||
|
if ch == ':' and not value_split:
|
|||
|
value_split = i
|
|||
|
if ch == '"':
|
|||
|
in_quotes = not in_quotes
|
|||
|
name = unescape_string(st[:name_split])
|
|||
|
if not name:
|
|||
|
raise ValueError('Key name is required')
|
|||
|
validate_token(name)
|
|||
|
if not name_split or name_split + 1 == value_split:
|
|||
|
raise ValueError('Invalid content line')
|
|||
|
params = Parameters.from_ical(st[name_split + 1: value_split],
|
|||
|
strict=self.strict)
|
|||
|
params = Parameters(
|
|||
|
(unescape_string(key), unescape_list_or_string(value))
|
|||
|
for key, value in compat.iteritems(params)
|
|||
|
)
|
|||
|
values = unescape_string(st[value_split + 1:])
|
|||
|
return (name, params, values)
|
|||
|
except ValueError as exc:
|
|||
|
raise ValueError(
|
|||
|
"Content line could not be parsed into parts: '%s': %s"
|
|||
|
% (self, exc)
|
|||
|
)
|
|||
|
|
|||
|
@classmethod
|
|||
|
def from_ical(cls, ical, strict=False):
|
|||
|
"""Unfold the content lines in an iCalendar into long content lines.
|
|||
|
"""
|
|||
|
ical = to_unicode(ical)
|
|||
|
# a fold is carriage return followed by either a space or a tab
|
|||
|
return cls(uFOLD.sub('', ical), strict=strict)
|
|||
|
|
|||
|
def to_ical(self):
|
|||
|
"""Long content lines are folded so they are less than 75 characters
|
|||
|
wide.
|
|||
|
"""
|
|||
|
return foldline(self).encode(DEFAULT_ENCODING)
|
|||
|
|
|||
|
|
|||
|
class Contentlines(list):
|
|||
|
"""I assume that iCalendar files generally are a few kilobytes in size.
|
|||
|
Then this should be efficient. for Huge files, an iterator should probably
|
|||
|
be used instead.
|
|||
|
"""
|
|||
|
def to_ical(self):
|
|||
|
"""Simply join self.
|
|||
|
"""
|
|||
|
return b'\r\n'.join(line.to_ical() for line in self if line) + b'\r\n'
|
|||
|
|
|||
|
@classmethod
|
|||
|
def from_ical(cls, st):
|
|||
|
"""Parses a string into content lines.
|
|||
|
"""
|
|||
|
st = to_unicode(st)
|
|||
|
try:
|
|||
|
# a fold is carriage return followed by either a space or a tab
|
|||
|
unfolded = uFOLD.sub('', st)
|
|||
|
lines = cls(Contentline(line) for
|
|||
|
line in NEWLINE.split(unfolded) if line)
|
|||
|
lines.append('') # '\r\n' at the end of every content line
|
|||
|
return lines
|
|||
|
except:
|
|||
|
raise ValueError('Expected StringType with content lines')
|
|||
|
|
|||
|
|
|||
|
# XXX: what kind of hack is this? import depends to be at end
|
|||
|
from icalendar.prop import vText
|