#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import sys , json , re , os
from datetime import datetime
from urllib import urlencode
from urllib2 import HTTPError
from common import *
from time import sleep
"""
pull ( meta ) :
Update meta data files for those that have changed .
Check for changed pads by looking at revisions & comparing to existing
"""
def main ( args ) :
p = ArgumentParser ( " Check for pads that have changed since last sync (according to .meta.json) " )
p . add_argument ( " padid " , nargs = " * " , default = [ ] )
p . add_argument ( " --padinfo " , default = " .etherdump/settings.json " , help = " settings, default: .etherdump/settings.json " )
p . add_argument ( " --zerorevs " , default = False , action = " store_true " , help = " include pads with zero revisions, default: False (i.e. pads with no revisions are skipped) " )
p . add_argument ( " --pub " , default = " . " , help = " folder to store files for public pads, default: pub " )
p . add_argument ( " --group " , default = " g " , help = " folder to store files for group pads, default: g " )
p . add_argument ( " --skip " , default = None , type = int , help = " skip this many items, default: None " )
p . add_argument ( " --meta " , default = False , action = " store_true " , help = " download meta to PADID.meta.json, default: False " )
p . add_argument ( " --text " , default = False , action = " store_true " , help = " download text to PADID.txt, default: False " )
p . add_argument ( " --html " , default = False , action = " store_true " , help = " download html to PADID.html, default: False " )
p . add_argument ( " --dhtml " , default = False , action = " store_true " , help = " download dhtml to PADID.dhtml, default: False " )
p . add_argument ( " --all " , default = False , action = " store_true " , help = " download all files (meta, text, html, dhtml), default: False " )
args = p . parse_args ( args )
info = loadpadinfo ( args . padinfo )
data = { }
data [ ' apikey ' ] = info [ ' apikey ' ]
if args . padid :
padids = args . padid
else :
padids = getjson ( info [ ' apiurl ' ] + ' listAllPads? ' + urlencode ( data ) ) [ ' data ' ] [ ' padIDs ' ]
padids . sort ( )
numpads = len ( padids )
# maxmsglen = 0
count = 0
for i , padid in enumerate ( padids ) :
if args . skip != None and i < args . skip :
continue
progressbar ( i , numpads , padid )
data [ ' padID ' ] = padid . encode ( " utf-8 " )
p = padpath ( padid , args . pub , args . group )
metapath = p + " .meta.json "
revisions = None
tries = 1
skip = False
while True :
try :
if os . path . exists ( metapath ) :
with open ( metapath ) as f :
meta = json . load ( f )
revisions = getjson ( info [ ' apiurl ' ] + ' getRevisionsCount? ' + urlencode ( data ) ) [ ' data ' ] [ ' revisions ' ]
if meta [ ' revisions ' ] == revisions :
skip = True
break
meta = { ' padid ' : padid . encode ( " utf-8 " ) }
if revisions == None :
meta [ ' revisions ' ] = getjson ( info [ ' apiurl ' ] + ' getRevisionsCount? ' + urlencode ( data ) ) [ ' data ' ] [ ' revisions ' ]
else :
meta [ ' revisions ' ] = revisions
if ( meta [ ' revisions ' ] == 0 ) and ( not args . zerorevs ) :
# print("Skipping zero revs", file=sys.stderr)
skip = True
break
# todo: load more metadata!
meta [ ' pad ' ] , meta [ ' group ' ] = splitpadname ( padid )
meta [ ' pathbase ' ] = p
meta [ ' lastedited_raw ' ] = int ( getjson ( info [ ' apiurl ' ] + ' getLastEdited? ' + urlencode ( data ) ) [ ' data ' ] [ ' lastEdited ' ] )
meta [ ' lastedited_iso ' ] = datetime . fromtimestamp ( int ( meta [ ' lastedited_raw ' ] ) / 1000 ) . isoformat ( )
meta [ ' author_ids ' ] = getjson ( info [ ' apiurl ' ] + ' listAuthorsOfPad? ' + urlencode ( data ) ) [ ' data ' ] [ ' authorIDs ' ]
break
except HTTPError as e :
tries + = 1
if tries > 3 :
print ( " Too many failures ( {0} ), skipping " . format ( padid ) . encode ( " utf-8 " ) , file = sys . stderr )
skip = True
break
if skip :
continue
count + = 1
print ( padid . encode ( " utf-8 " ) )
if args . all or ( args . meta or args . text or args . html or args . dhtml ) :
try :
os . makedirs ( os . path . split ( metapath ) [ 0 ] )
except OSError :
pass
if args . all or args . meta :
with open ( metapath , " w " ) as f :
json . dump ( meta , f )
# Process text, html, dhtml, all options
if args . all or args . text :
text = getjson ( info [ ' apiurl ' ] + ' getText? ' + urlencode ( data ) )
text = text [ ' data ' ] [ ' text ' ]
with open ( p + " .txt " , " w " ) as f :
f . write ( text . encode ( " utf-8 " ) )
if args . all or args . html :
html = getjson ( info [ ' apiurl ' ] + ' getHTML? ' + urlencode ( data ) )
html = html [ ' data ' ] [ ' html ' ]
with open ( p + " .html " , " w " ) as f :
f . write ( html . encode ( " utf-8 " ) )
if args . all or args . dhtml :
tries = 0
skip = False
while not skip :
try :
data [ ' startRev ' ] = " 0 "
html = getjson ( info [ ' apiurl ' ] + ' createDiffHTML? ' + urlencode ( data ) )
html = html [ ' data ' ] [ ' html ' ]
with open ( p + " .diff.html " , " w " ) as f :
f . write ( html . encode ( " utf-8 " ) )
break
except HTTPError as e :
print ( " HTTPERROR {0} " . format ( e ) , file = sys . stderr )
tries + = 1
if tries > = 5 :
print ( " Too many errors, deleting .diff.html and skipping " , file = sys . stderr )
try :
os . remove ( p + " .diff.html " )
except OSError :
pass
skip = True
else :
sleep ( 0.1 )
print ( " \n {0} pad(s) changed " . format ( count ) , file = sys . stderr )