#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import sys , json , re , os
from datetime import datetime
from urllib import urlencode
from urllib2 import HTTPError
from jinja2 import FileSystemLoader , Environment
from common import *
from time import sleep
import dateutil . parser
"""
rss :
Generate an RSS feed from an etherdump .
TODO NEXT
add back limit and ordering parameters to create filters to make a latest changes feed !
"""
def group ( items , key = lambda x : x ) :
ret = [ ]
keys = { }
for item in items :
k = key ( item )
if k not in keys :
keys [ k ] = [ ]
keys [ k ] . append ( item )
for k in sorted ( keys ) :
keys [ k ] . sort ( )
ret . append ( keys [ k ] )
return ret
def base ( x ) :
return re . sub ( r " ( \ .raw \ .html)|( \ .diff \ .html)|( \ .meta \ .json)|( \ .raw \ .txt)$ " , " " , x )
def main ( args ) :
p = ArgumentParser ( " Check for pads that have changed since last sync (according to .meta.json) " )
p . add_argument ( " input " , nargs = " + " , help = " filenames " )
p . add_argument ( " --templates " , default = None , help = " templates path " )
p . add_argument ( " --padinfo " , default = " .etherdump/settings.json " , help = " settings, default: .etherdump/settings.json " )
p . add_argument ( " --zerorevs " , default = False , action = " store_true " , help = " include pads with zero revisions, default: False (i.e. pads with no revisions are skipped) " )
p . add_argument ( " --skip " , default = None , type = int , help = " skip this many items, default: None " )
p . add_argument ( " --type " , default = " lastchanges " , help = " type of feed, default: lastchanges " )
p . add_argument ( " --title " , default = " etherpad " , help = " rss feed channel title, default: etherpad " )
p . add_argument ( " --description " , default = " " , help = " channel description, default: empty " )
p . add_argument ( " --language " , default = " en-US " , help = " feed language, default: en-US " )
p . add_argument ( " --updatePeriod " , default = " daily " , help = " updatePeriod, possible values: hourly, daily, weekly, monthly, yearly; default: daily " )
p . add_argument ( " --updateFrequency " , default = 1 , type = int , help = " update frequency within the update period (where 2 would mean twice per period); default: 1 " )
p . add_argument ( " --padurl " , default = None , help = " to use as channel link, default: padurl " )
p . add_argument ( " --feedurl " , default = " feed.xml " , help = " to use as feeds own (self) link, default: feed.xml " )
p . add_argument ( " --generator " , default = " https://gitlab.com/activearchives/etherdump " , help = " generator, default: https://gitlab.com/activearchives/etherdump " )
p . add_argument ( " --itemlink " , default = " pad " , help = " item to link to in feed, possible values: pad, text, html, dhtml; default: pad " )
p . add_argument ( " --chronological " , default = False , action = " store_true " , help = " order chronologically, default: False " )
args = p . parse_args ( args )
tmpath = args . templates
if tmpath == None :
tmpath = os . path . split ( os . path . abspath ( __file__ ) ) [ 0 ]
tmpath = os . path . split ( tmpath ) [ 0 ]
tmpath = os . path . join ( tmpath , " data " , " templates " )
env = Environment ( loader = FileSystemLoader ( tmpath ) )
template = env . get_template ( " rss.xml " )
info = loadpadinfo ( args . padinfo )
inputs = args . input
inputs . sort ( )
inputs = group ( inputs , base )
def loadmeta ( paths ) :
for p in paths :
if p . endswith ( " .meta.json " ) :
with open ( p ) as f :
return json . load ( f )
def fixdates ( padmeta ) :
d = dateutil . parser . parse ( padmeta [ " lastedited_iso " ] )
padmeta [ " lastedited " ] = d
padmeta [ " lastedited_822 " ] = d . strftime ( " %a , %d % b % Y % H: % M: % S +0000 " )
return padmeta
pads = map ( loadmeta , inputs )
pads = map ( fixdates , pads )
args . pads = pads
# args.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
padurlbase = re . sub ( r " api/1.2.9/$ " , " p/ " , info [ " apiurl " ] )
if type ( padurlbase ) == unicode :
padurlbase = padurlbase . encode ( " utf-8 " )
args . padurl = args . padurl or padurlbase
args . utcnow = datetime . utcnow ( ) . strftime ( " %a , %d % b % Y % H: % M: % S +0000 " )
# from pprint import pprint
# pprint(inputs)
print ( template . render ( vars ( args ) ) . encode ( " utf-8 " ) )