2015-12-04 17:17:32 +01:00
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
2016-01-15 14:04:03 +01:00
import sys , json , re , os , urlparse
from datetime import datetime
2015-12-04 17:17:32 +01:00
from urllib import urlencode
2016-01-15 14:04:03 +01:00
from urllib2 import HTTPError
2015-12-04 17:17:32 +01:00
from jinja2 import FileSystemLoader , Environment
2016-01-15 14:04:03 +01:00
from common import *
from time import sleep
import dateutil . parser
2016-01-08 12:09:05 +01:00
2016-01-15 14:04:03 +01:00
"""
index :
Generate pages from etherdumps using a template .
Built - in templates : rss . xml , index . html
"""
2015-12-04 17:17:32 +01:00
def group ( items , key = lambda x : x ) :
ret = [ ]
keys = { }
for item in items :
k = key ( item )
if k not in keys :
keys [ k ] = [ ]
keys [ k ] . append ( item )
for k in sorted ( keys ) :
keys [ k ] . sort ( )
ret . append ( keys [ k ] )
return ret
2016-01-15 14:04:03 +01:00
def base ( x ) :
return re . sub ( r " ( \ .raw \ .html)|( \ .diff \ .html)|( \ .meta \ .json)|( \ .raw \ .txt)$ " , " " , x )
def excerpt ( t , chars = 25 ) :
if len ( t ) > chars :
t = t [ : chars ] + " ... "
return t
def absurl ( url , base = None ) :
if not url . startswith ( " http " ) :
return base + url
return url
def url_base ( url ) :
( scheme , netloc , path , params , query , fragment ) = urlparse . urlparse ( url )
path , _ = os . path . split ( path . lstrip ( " / " ) )
ret = urlparse . urlunparse ( ( scheme , netloc , path , None , None , None ) )
if ret :
ret + = " / "
return ret
def main ( args ) :
p = ArgumentParser ( " Convert dumped files to a document via a template. " )
2016-01-15 14:42:23 +01:00
p . add_argument ( " input " , nargs = " + " , help = " filenames (uses .meta.json files) " )
2016-01-15 14:04:03 +01:00
p . add_argument ( " --templatepath " , default = None , help = " path to find templates, default: built-in " )
p . add_argument ( " --template " , default = " index.html " , help = " template name, built-ins include index.html, rss.xml; default: index.html " )
2016-01-15 14:42:23 +01:00
p . add_argument ( " --padinfo " , default = " .etherdump/settings.json " , help = " settings, default: ./.etherdump/settings.json " )
2016-01-15 14:04:03 +01:00
# p.add_argument("--zerorevs", default=False, action="store_true", help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)")
2016-01-15 14:42:23 +01:00
p . add_argument ( " --order " , default = " padid " , help = " order, possible values: padid, pad (no group name), lastedited, (number of) authors, revisions, default: padid " )
2016-01-15 14:04:03 +01:00
p . add_argument ( " --reverse " , default = False , action = " store_true " , help = " reverse order, default: False (reverse chrono) " )
p . add_argument ( " --limit " , type = int , default = 0 , help = " limit to number of items, default: 0 (no limit) " )
2016-01-15 14:42:23 +01:00
p . add_argument ( " --skip " , default = None , type = int , help = " skip this many items, default: None " )
2016-01-15 14:04:03 +01:00
2016-01-15 14:42:23 +01:00
p . add_argument ( " --content " , default = False , action = " store_true " , help = " rss: include (full) content tag, default: False " )
p . add_argument ( " --link " , default = " diffhtml,html,text " , help = " link variable will be to this version, can be comma-delim list, use first avail, default: diffhtml,html,text " )
2016-01-15 14:04:03 +01:00
p . add_argument ( " --linkbase " , default = None , help = " base url to use for links, default: try to use the feedurl " )
2016-03-02 22:24:49 +01:00
p . add_argument ( " --output " , default = None , help = " output, default: stdout " )
2016-01-15 14:04:03 +01:00
2016-01-15 14:42:23 +01:00
pg = p . add_argument_group ( ' template variables ' )
pg . add_argument ( " --feedurl " , default = " feed.xml " , help = " rss: to use as feeds own (self) link, default: feed.xml " )
pg . add_argument ( " --siteurl " , default = None , help = " rss: to use as channel ' s site link, default: the etherpad url " )
pg . add_argument ( " --title " , default = " etherdump " , help = " title for document or rss feed channel title, default: etherdump " )
pg . add_argument ( " --description " , default = " " , help = " rss: channel description, default: empty " )
pg . add_argument ( " --language " , default = " en-US " , help = " rss: feed language, default: en-US " )
pg . add_argument ( " --updatePeriod " , default = " daily " , help = " rss: updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily " )
pg . add_argument ( " --updateFrequency " , default = 1 , type = int , help = " rss: update frequency within the update period (where 2 would mean twice per period); default: 1 " )
pg . add_argument ( " --generator " , default = " https://gitlab.com/activearchives/etherdump " , help = " generator, default: https://gitlab.com/activearchives/etherdump " )
2016-01-15 14:53:14 +01:00
pg . add_argument ( " --timestamp " , default = None , help = " timestamp, default: now (e.g. 2015-12-01 12:30:00) " )
2016-01-15 16:28:03 +01:00
pg . add_argument ( " --next " , default = None , help = " next link, default: None) " )
pg . add_argument ( " --prev " , default = None , help = " prev link, default: None " )
2016-01-15 14:42:23 +01:00
2015-12-04 17:17:32 +01:00
args = p . parse_args ( args )
2016-01-15 14:04:03 +01:00
tmpath = args . templatepath
# Default path for template is the built-in data/templates
2015-12-04 17:17:32 +01:00
if tmpath == None :
tmpath = os . path . split ( os . path . abspath ( __file__ ) ) [ 0 ]
tmpath = os . path . split ( tmpath ) [ 0 ]
tmpath = os . path . join ( tmpath , " data " , " templates " )
env = Environment ( loader = FileSystemLoader ( tmpath ) )
2016-01-15 14:04:03 +01:00
env . filters [ " excerpt " ] = excerpt
template = env . get_template ( args . template )
2016-01-08 12:09:05 +01:00
2016-01-15 14:04:03 +01:00
info = loadpadinfo ( args . padinfo )
2015-12-04 17:17:32 +01:00
inputs = args . input
inputs . sort ( )
2016-01-08 12:09:05 +01:00
inputs = group ( inputs , base )
2015-12-04 17:17:32 +01:00
2016-01-08 12:09:05 +01:00
def loadmeta ( paths ) :
for p in paths :
if p . endswith ( " .meta.json " ) :
with open ( p ) as f :
return json . load ( f )
2016-01-15 14:04:03 +01:00
def fixdates ( padmeta ) :
d = dateutil . parser . parse ( padmeta [ " lastedited_iso " ] )
padmeta [ " lastedited " ] = d
padmeta [ " lastedited_822 " ] = d . strftime ( " %a , %d % b % Y % H: % M: % S +0000 " )
return padmeta
pads = map ( loadmeta , inputs )
pads = map ( fixdates , pads )
args . pads = pads
2016-01-15 14:53:14 +01:00
if args . timestamp == None :
args . timestamp = datetime . now ( ) . strftime ( " % Y- % m- %d % H: % M: % S " )
2016-01-15 14:04:03 +01:00
padurlbase = re . sub ( r " api/1.2.9/$ " , " p/ " , info [ " apiurl " ] )
if type ( padurlbase ) == unicode :
padurlbase = padurlbase . encode ( " utf-8 " )
args . siteurl = args . siteurl or padurlbase
args . utcnow = datetime . utcnow ( ) . strftime ( " %a , %d % b % Y % H: % M: % S +0000 " )
# order items & apply limit
if args . order == " lastedited " :
args . pads . sort ( key = lambda x : x . get ( " lastedited_iso " ) , reverse = args . reverse )
elif args . order == " pad " :
args . pads . sort ( key = lambda x : x . get ( " pad " ) , reverse = args . reverse )
elif args . order == " padid " :
args . pads . sort ( key = lambda x : x . get ( " padid " ) , reverse = args . reverse )
elif args . order == " revisions " :
args . pads . sort ( key = lambda x : x . get ( " revisions " ) , reverse = args . reverse )
elif args . order == " authors " :
args . pads . sort ( key = lambda x : len ( x . get ( " authors " ) ) , reverse = args . reverse )
else :
raise Exception ( " That ordering is not implemented! " )
if args . limit :
args . pads = args . pads [ : args . limit ]
# add versions_by_type, add in full text
# add link (based on args.link)
linkversions = args . link . split ( " , " )
linkbase = args . linkbase or url_base ( args . feedurl )
# print ("linkbase", linkbase, args.linkbase, args.feedurl)
for p in pads :
versions_by_type = { }
p [ " versions_by_type " ] = versions_by_type
for v in p [ " versions " ] :
t = v [ " type " ]
versions_by_type [ t ] = v
with open ( versions_by_type [ " text " ] [ " path " ] ) as f :
p [ " text " ] = f . read ( ) . decode ( " utf-8 " )
# ADD IN LINK
for v in linkversions :
vdata = versions_by_type [ v ]
try :
if v == " pad " or os . path . exists ( vdata [ " path " ] ) :
p [ " link " ] = absurl ( vdata [ " url " ] , linkbase )
break
except KeyError as e :
pass
2016-03-02 22:24:49 +01:00
if args . output :
with open ( args . output , " w " ) as f :
print ( template . render ( vars ( args ) ) . encode ( " utf-8 " ) , file = f )
else :
print ( template . render ( vars ( args ) ) . encode ( " utf-8 " ) )