#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import sys , json , re , os
from datetime import datetime
from urllib import urlencode
from urllib2 import urlopen , HTTPError , URLError
from math import ceil , floor
from common import *
"""
sync ( meta ) :
Update meta data files for those that have changed .
Check for changed pads by looking at revisions & comparing to existing
"""
def jsonload ( url ) :
f = urlopen ( url )
data = f . read ( )
f . close ( )
return json . loads ( data )
def load_padinfo ( p ) :
with open ( p ) as f :
info = json . load ( f )
return info
def main ( args ) :
p = ArgumentParser ( " Check for pads that have changed since last sync (according to .meta.json) " )
p . add_argument ( " padid " , nargs = " * " , default = [ ] )
p . add_argument ( " --padinfo " , default = " .etherdump/settings.json " , help = " settings, default: .etherdump/settings.json " )
p . add_argument ( " --zerorevs " , default = False , action = " store_true " , help = " include pads with zero revisions, default: False (i.e. pads with no revisions are skipped) " )
p . add_argument ( " --pub " , default = " . " , help = " folder to store files for public pads, default: pub " )
p . add_argument ( " --group " , default = " g " , help = " folder to store files for group pads, default: g " )
p . add_argument ( " --skip " , default = None , type = int , help = " skip this many items, default: None " )
p . add_argument ( " --meta " , default = False , action = " store_true " , help = " download meta to PADID.meta.json, default: False " )
p . add_argument ( " --text " , default = False , action = " store_true " , help = " download text to PADID.txt, default: False " )
p . add_argument ( " --html " , default = False , action = " store_true " , help = " download html to PADID.html, default: False " )
p . add_argument ( " --dhtml " , default = False , action = " store_true " , help = " download dhtml to PADID.dhtml, default: False " )
p . add_argument ( " --all " , default = False , action = " store_true " , help = " download all files (meta, text, html, dhtml), default: False " )
args = p . parse_args ( args )
info = load_padinfo ( args . padinfo )
data = { }
data [ ' apikey ' ] = info [ ' apikey ' ]
if args . padid :
padids = args . padid
else :
padids = jsonload ( info [ ' apiurl ' ] + ' listAllPads? ' + urlencode ( data ) ) [ ' data ' ] [ ' padIDs ' ]
padids . sort ( )
numpads = len ( padids )
# maxmsglen = 0
count = 0
for i , padid in enumerate ( padids ) :
if args . skip != None and i < args . skip :
continue
p = ( float ( i ) / numpads )
percentage = int ( floor ( p * 100 ) )
bars = int ( ceil ( p * 20 ) )
bar = ( " * " * bars ) + ( " - " * ( 20 - bars ) )
msg = u " \r {0} {1} / {2} {3} ... " . format ( bar , ( i + 1 ) , numpads , padid )
# if len(msg) > maxmsglen:
# maxmsglen = len(msg)
# sys.stderr.write("\r{0}".format(" "*maxmsglen))
sys . stderr . write ( msg . encode ( " utf-8 " ) )
sys . stderr . flush ( )
data [ ' padID ' ] = padid . encode ( " utf-8 " )
p = padpath ( padid , args . pub , args . group )
metapath = p + " .meta.json "
revisions = None
tries = 1
skip = False
while True :
try :
if os . path . exists ( metapath ) :
with open ( metapath ) as f :
meta = json . load ( f )
revisions = jsonload ( info [ ' apiurl ' ] + ' getRevisionsCount? ' + urlencode ( data ) ) [ ' data ' ] [ ' revisions ' ]
if meta [ ' revisions ' ] == revisions :
skip = True
break
meta = { ' padid ' : padid . encode ( " utf-8 " ) }
if revisions == None :
meta [ ' revisions ' ] = jsonload ( info [ ' apiurl ' ] + ' getRevisionsCount? ' + urlencode ( data ) ) [ ' data ' ] [ ' revisions ' ]
else :
meta [ ' revisions ' ] = revisions
if ( meta [ ' revisions ' ] == 0 ) and ( not args . zerorevs ) :
# print("Skipping zero revs", file=sys.stderr)
skip = True
break
# todo: load more metadata!
meta [ ' pad ' ] , meta [ ' group ' ] = splitpadname ( padid )
meta [ ' pathbase ' ] = p
meta [ ' lastedited_raw ' ] = int ( jsonload ( info [ ' apiurl ' ] + ' getLastEdited? ' + urlencode ( data ) ) [ ' data ' ] [ ' lastEdited ' ] )
meta [ ' lastedited_iso ' ] = datetime . fromtimestamp ( int ( meta [ ' lastedited_raw ' ] ) / 1000 ) . isoformat ( )
meta [ ' author_ids ' ] = jsonload ( info [ ' apiurl ' ] + ' listAuthorsOfPad? ' + urlencode ( data ) ) [ ' data ' ] [ ' authorIDs ' ]
break
except HTTPError as e :
tries + = 1
if tries > 3 :
print ( " Too many failures ( {0} ), skipping " . format ( padid ) . encode ( " utf-8 " ) , file = sys . stderr )
skip = True
break
if skip :
continue
count + = 1
print ( padid . encode ( " utf-8 " ) )
if args . all or ( args . meta or args . text or args . html or args . dhtml ) :
try :
os . makedirs ( os . path . split ( metapath ) [ 0 ] )
except OSError :
pass
if args . all or args . meta :
with open ( metapath , " w " ) as f :
json . dump ( meta , f )
# Process text, html, dhtml, all options
if args . all or args . text :
text = jsonload ( info [ ' apiurl ' ] + ' getText? ' + urlencode ( data ) )
text = text [ ' data ' ] [ ' text ' ]
with open ( p + " .txt " , " w " ) as f :
f . write ( text . encode ( " utf-8 " ) )
if args . all or args . html :
html = jsonload ( info [ ' apiurl ' ] + ' getHTML? ' + urlencode ( data ) )
html = html [ ' data ' ] [ ' html ' ]
with open ( p + " .html " , " w " ) as f :
f . write ( html . encode ( " utf-8 " ) )
if args . all or args . dhtml :
data [ ' startRev ' ] = " 0 "
html = jsonload ( info [ ' apiurl ' ] + ' createDiffHTML? ' + urlencode ( data ) )
html = html [ ' data ' ] [ ' html ' ]
with open ( p + " .diff.html " , " w " ) as f :
f . write ( html . encode ( " utf-8 " ) )
print ( " \n {0} pad(s) changed " . format ( count ) , file = sys . stderr )