#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import sys , json , re , os , urlparse
from datetime import datetime
from urllib import urlencode
from urllib2 import HTTPError
from jinja2 import FileSystemLoader , Environment
from common import *
from time import sleep
import dateutil . parser
"""
rss :
Generate an RSS feed from an etherdump .
TODO NEXT
add back limit and ordering parameters to create filters to make a latest changes feed !
"""
def group ( items , key = lambda x : x ) :
ret = [ ]
keys = { }
for item in items :
k = key ( item )
if k not in keys :
keys [ k ] = [ ]
keys [ k ] . append ( item )
for k in sorted ( keys ) :
keys [ k ] . sort ( )
ret . append ( keys [ k ] )
return ret
def base ( x ) :
return re . sub ( r " ( \ .raw \ .html)|( \ .diff \ .html)|( \ .meta \ .json)|( \ .raw \ .txt)$ " , " " , x )
def excerpt ( t , chars = 25 ) :
if len ( t ) > chars :
t = t [ : chars ] + " ... "
return t
def absurl ( url , base = None ) :
if not url . startswith ( " http " ) :
return base + url
return url
def url_base ( url ) :
( scheme , netloc , path , params , query , fragment ) = urlparse . urlparse ( url )
path , _ = os . path . split ( path . lstrip ( " / " ) )
ret = urlparse . urlunparse ( ( scheme , netloc , path , None , None , None ) )
if ret :
ret + = " / "
return ret
def main ( args ) :
p = ArgumentParser ( " Check for pads that have changed since last sync (according to .meta.json) " )
p . add_argument ( " input " , nargs = " + " , help = " filenames " )
p . add_argument ( " --templates " , default = None , help = " templates path " )
p . add_argument ( " --padinfo " , default = " .etherdump/settings.json " , help = " settings, default: .etherdump/settings.json " )
p . add_argument ( " --zerorevs " , default = False , action = " store_true " , help = " include pads with zero revisions, default: False (i.e. pads with no revisions are skipped) " )
p . add_argument ( " --skip " , default = None , type = int , help = " skip this many items, default: None " )
p . add_argument ( " --type " , default = " recentchanges " , help = " type of feed, default: recentchanges " )
p . add_argument ( " --limit " , type = int , default = 10 , help = " number of items, default: 10 " )
p . add_argument ( " --chronological " , default = False , action = " store_true " , help = " order chronologically, default: False (reverse chrono) " )
p . add_argument ( " --title " , default = " etherpad " , help = " rss feed channel title, default: etherpad " )
p . add_argument ( " --description " , default = " " , help = " channel description, default: empty " )
p . add_argument ( " --language " , default = " en-US " , help = " feed language, default: en-US " )
p . add_argument ( " --updatePeriod " , default = " daily " , help = " updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily " )
p . add_argument ( " --updateFrequency " , default = 1 , type = int , help = " update frequency within the update period (where 2 would mean twice per period); default: 1 " )
p . add_argument ( " --siteurl " , default = None , help = " to use as channel ' s site link, default: the etherpad url " )
p . add_argument ( " --feedurl " , default = " feed.xml " , help = " to use as feeds own (self) link, default: feed.xml " )
p . add_argument ( " --generator " , default = " https://gitlab.com/activearchives/etherdump " , help = " generator, default: https://gitlab.com/activearchives/etherdump " )
p . add_argument ( " --content " , default = False , action = " store_true " , help = " include content, default: False " )
p . add_argument ( " --link " , default = " diffhtml,html,text " , help = " version to use as link, can be comma-delim list, use first avail, default: diffhtml,html,text " )
p . add_argument ( " --linkbase " , default = None , help = " base url to use for links, default: try to use the feedurl " )
args = p . parse_args ( args )
tmpath = args . templates
if tmpath == None :
tmpath = os . path . split ( os . path . abspath ( __file__ ) ) [ 0 ]
tmpath = os . path . split ( tmpath ) [ 0 ]
tmpath = os . path . join ( tmpath , " data " , " templates " )
env = Environment ( loader = FileSystemLoader ( tmpath ) )
env . filters [ " excerpt " ] = excerpt
template = env . get_template ( " rss.xml " )
info = loadpadinfo ( args . padinfo )
inputs = args . input
inputs . sort ( )
inputs = group ( inputs , base )
def loadmeta ( paths ) :
for p in paths :
if p . endswith ( " .meta.json " ) :
with open ( p ) as f :
return json . load ( f )
def fixdates ( padmeta ) :
d = dateutil . parser . parse ( padmeta [ " lastedited_iso " ] )
padmeta [ " lastedited " ] = d
padmeta [ " lastedited_822 " ] = d . strftime ( " %a , %d % b % Y % H: % M: % S +0000 " )
return padmeta
pads = map ( loadmeta , inputs )
pads = map ( fixdates , pads )
args . pads = pads
# args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
padurlbase = re . sub ( r " api/1.2.9/$ " , " p/ " , info [ " apiurl " ] )
if type ( padurlbase ) == unicode :
padurlbase = padurlbase . encode ( " utf-8 " )
args . siteurl = args . siteurl or padurlbase
args . utcnow = datetime . utcnow ( ) . strftime ( " %a , %d % b % Y % H: % M: % S +0000 " )
# order items & apply limit
args . pads . sort ( key = lambda x : x . get ( " lastedited_iso " ) , reverse = not args . chronological )
if args . limit :
args . pads = args . pads [ : args . limit ]
# add versions_by_type, add in full text
# add link (based on args.link)
linkversions = args . link . split ( " , " )
linkbase = args . linkbase or url_base ( args . feedurl )
# print ("linkbase", linkbase, args.linkbase, args.feedurl)
for p in pads :
versions_by_type = { }
p [ " versions_by_type " ] = versions_by_type
for v in p [ " versions " ] :
t = v [ " type " ]
versions_by_type [ t ] = v
with open ( versions_by_type [ " text " ] [ " path " ] ) as f :
p [ " text " ] = f . read ( ) . decode ( " utf-8 " )
# ADD IN LINK
for v in linkversions :
vdata = versions_by_type [ v ]
try :
if v == " pad " or os . path . exists ( vdata [ " path " ] ) :
p [ " link " ] = absurl ( vdata [ " url " ] , linkbase )
break
except KeyError as e :
pass
print ( template . render ( vars ( args ) ) . encode ( " utf-8 " ) )