etherpump/etherpump/commands/pull.py

import json
import os
import re
import sys
from argparse import ArgumentParser
from datetime import datetime
from fnmatch import fnmatch
from time import sleep
from urllib.parse import quote, urlencode
from urllib.request import HTTPError, URLError, urlopen
from xml.etree import ElementTree as ET

import html5lib

from etherpump.commands.common import *
from etherpump.commands.html5tidy import html5tidy


"""
pull(meta):
    Update meta data files for those that have changed.
    Check for changed pads by looking at revisions & comparing to existing


todo...
use/prefer public interfaces ? (export functions)


"""


def try_deleting(files):
    for f in files:
        try:
            os.remove(f)
        except OSError as e:
            pass


def main(args):
    p = ArgumentParser(
        "Check for pads that have changed since last sync (according to .meta.json)"
    )

    p.add_argument("padid", nargs="*", default=[])
    p.add_argument(
        "--glob", default=False, help="download pads matching a glob pattern"
    )

    p.add_argument(
        "--padinfo",
        default=".etherpump/settings.json",
        help="settings, default: .etherpump/settings.json",
    )
    p.add_argument(
        "--zerorevs",
        default=False,
        action="store_true",
        help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)",
    )
    p.add_argument(
        "--pub",
        default="p",
        help="folder to store files for public pads, default: p",
    )
    p.add_argument(
        "--group",
        default="g",
        help="folder to store files for group pads, default: g",
    )
    p.add_argument(
        "--skip",
        default=None,
        type=int,
        help="skip this many items, default: None",
    )
    p.add_argument(
        "--meta",
        default=False,
        action="store_true",
        help="download meta to PADID.meta.json, default: False",
    )
    p.add_argument(
        "--text",
        default=False,
        action="store_true",
        help="download text to PADID.txt, default: False",
    )
    p.add_argument(
        "--html",
        default=False,
        action="store_true",
        help="download html to PADID.html, default: False",
    )
    p.add_argument(
        "--dhtml",
        default=False,
        action="store_true",
        help="download dhtml to PADID.diff.html, default: False",
    )
    p.add_argument(
        "--all",
        default=False,
        action="store_true",
        help="download all files (meta, text, html, dhtml), default: False",
    )
    p.add_argument(
        "--folder",
        default=False,
        action="store_true",
        help="dump files in a folder named PADID (meta, text, html, dhtml), default: False",
    )
    p.add_argument(
        "--output",
        default=False,
        action="store_true",
        help="output changed padids on stdout",
    )
    p.add_argument(
        "--force",
        default=False,
        action="store_true",
        help="reload, even if revisions count matches previous",
    )
    p.add_argument(
        "--no-raw-ext",
        default=False,
        action="store_true",
        help="save plain text as padname with no (additional) extension",
    )
    p.add_argument(
        "--fix-names",
        default=False,
        action="store_true",
        help="normalize padid's (no spaces, special control chars) for use in file names",
    )

    p.add_argument(
        "--filter-ext", default=None, help="filter pads by extension"
    )

    p.add_argument(
        "--css",
        default="/styles.css",
        help="add css url to output pages, default: /styles.css",
    )
    p.add_argument(
        "--script",
        default="/versions.js",
        help="add script url to output pages, default: /versions.js",
    )

    p.add_argument(
        "--nopublish",
        default="__NOPUBLISH__",
        help="no publish magic word, default: __NOPUBLISH__",
    )
    p.add_argument(
        "--publish",
        default="__PUBLISH__",
        help="the publish magic word, default: __PUBLISH__",
    )
    p.add_argument(
        "--publish-opt-in",
        default=False,
        action="store_true",
        help="ensure `--publish` is honoured instead of `--nopublish`",
    )

    args = p.parse_args(args)

    raw_ext = ".raw.txt"
    if args.no_raw_ext:
        raw_ext = ""

    info = loadpadinfo(args.padinfo)
    data = {}
    data['apikey'] = info['apikey']

    if args.padid:
        padids = args.padid
    elif args.glob:
        padids = getjson(
            info['localapiurl'] + 'listAllPads?' + urlencode(data)
        )['data']['padIDs']
        padids = [x for x in padids if fnmatch(x, args.glob)]
    else:
        padids = getjson(
            info['localapiurl'] + 'listAllPads?' + urlencode(data)
        )['data']['padIDs']
    padids.sort()
    numpads = len(padids)
    # maxmsglen = 0
    count = 0
    for i, padid in enumerate(padids):
        if args.skip != None and i < args.skip:
            continue
        progressbar(i, numpads, padid)

        data['padID'] = padid
        p = padpath(padid, args.pub, args.group, args.fix_names)
        if args.folder:
            p = os.path.join(p, padid)

        metapath = p + ".meta.json"
        revisions = None
        tries = 1
        skip = False
        padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])
        meta = {}

        while True:
            try:
                if os.path.exists(metapath):
                    with open(metapath) as f:
                        meta.update(json.load(f))
                    revisions = getjson(
                        info['localapiurl']
                        + 'getRevisionsCount?'
                        + urlencode(data)
                    )['data']['revisions']
                    if meta['revisions'] == revisions and not args.force:
                        skip = True
                        break

                meta['padid'] = padid
                versions = meta["versions"] = []
                versions.append(
                    {
                        "url": padurlbase + quote(padid),
                        "type": "pad",
                        "code": 200,
                    }
                )

                if revisions == None:
                    meta['revisions'] = getjson(
                        info['localapiurl']
                        + 'getRevisionsCount?'
                        + urlencode(data)
                    )['data']['revisions']
                else:
                    meta['revisions'] = revisions

                if (meta['revisions'] == 0) and (not args.zerorevs):
                    # print("Skipping zero revs", file=sys.stderr)
                    skip = True
                    break

                # todo: load more metadata!
                meta['group'], meta['pad'] = splitpadname(padid)
                meta['pathbase'] = p
                meta['lastedited_raw'] = int(
                    getjson(
                        info['localapiurl'] + 'getLastEdited?' + urlencode(data)
                    )['data']['lastEdited']
                )
                meta['lastedited_iso'] = datetime.fromtimestamp(
                    int(meta['lastedited_raw']) / 1000
                ).isoformat()
                meta['author_ids'] = getjson(
                    info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)
                )['data']['authorIDs']
                break
            except HTTPError as e:
                tries += 1
                if tries > 3:
                    print(
                        "Too many failures ({0}), skipping".format(padid),
                        file=sys.stderr,
                    )
                    skip = True
                    break
                else:
                    sleep(3)
            except TypeError as e:
                print(
                    "Type Error loading pad {0} (phantom pad?), skipping".format(
                        padid
                    ),
                    file=sys.stderr,
                )
                skip = True
                break

        if skip:
            continue

        count += 1

        if args.output:
            print(padid)

        if args.all or (args.meta or args.text or args.html or args.dhtml):
            try:
                os.makedirs(os.path.split(metapath)[0])
            except OSError:
                pass

        if args.all or args.text:
            text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))
            ver = {"type": "text"}
            versions.append(ver)
            ver["code"] = text["_code"]
            if text["_code"] == 200:
                text = text['data']['text']

                ##########################################
                ## ENFORCE __NOPUBLISH__ MAGIC WORD
                ##########################################
                if args.nopublish and args.nopublish in text:
                    # NEED TO PURGE ANY EXISTING DOCS
                    try_deleting(
                        (
                            p + raw_ext,
                            p + ".raw.html",
                            p + ".diff.html",
                            p + ".meta.json",
                        )
                    )
                    continue

                ##########################################
                ## ENFORCE __PUBLISH__ MAGIC WORD
                ##########################################
                if args.publish_opt_in and args.publish not in text:
                    try_deleting(
                        (
                            p + raw_ext,
                            p + ".raw.html",
                            p + ".diff.html",
                            p + ".meta.json",
                        )
                    )
                    continue

                ver["path"] = p + raw_ext
                ver["url"] = quote(ver["path"])
                with open(ver["path"], "w") as f:
                    f.write(text)
                # once the content is settled, compute a hash
                # and link it in the metadata!

        links = []
        if args.css:
            links.append({"href": args.css, "rel": "stylesheet"})
        # todo, make this process reflect which files actually were made
        versionbaseurl = quote(padid)
        links.append(
            {
                "href": versions[0]["url"],
                "rel": "alternate",
                "type": "text/html",
                "title": "Etherpad",
            }
        )
        if args.all or args.text:
            links.append(
                {
                    "href": versionbaseurl + raw_ext,
                    "rel": "alternate",
                    "type": "text/plain",
                    "title": "Plain text",
                }
            )
        if args.all or args.html:
            links.append(
                {
                    "href": versionbaseurl + ".raw.html",
                    "rel": "alternate",
                    "type": "text/html",
                    "title": "HTML",
                }
            )
        if args.all or args.dhtml:
            links.append(
                {
                    "href": versionbaseurl + ".diff.html",
                    "rel": "alternate",
                    "type": "text/html",
                    "title": "HTML with author colors",
                }
            )
        if args.all or args.meta:
            links.append(
                {
                    "href": versionbaseurl + ".meta.json",
                    "rel": "alternate",
                    "type": "application/json",
                    "title": "Meta data",
                }
            )

        # links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"})

        if args.all or args.dhtml:
            data['startRev'] = "0"
            html = getjson(
                info['localapiurl'] + 'createDiffHTML?' + urlencode(data)
            )
            ver = {"type": "diffhtml"}
            versions.append(ver)
            ver["code"] = html["_code"]
            if html["_code"] == 200:
                try:
                    html = html['data']['html']
                    ver["path"] = p + ".diff.html"
                    ver["url"] = quote(ver["path"])
                    # doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
                    doc = html5lib.parse(
                        html, treebuilder="etree", namespaceHTMLElements=False
                    )
                    html5tidy(
                        doc,
                        indent=True,
                        title=padid,
                        scripts=args.script,
                        links=links,
                    )
                    with open(ver["path"], "w") as f:
                        print(
                            ET.tostring(doc, method="html", encoding="unicode"),
                            file=f,
                        )
                except TypeError:
                    # Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!
                    ver["message"] = html["message"]
                    # with open(ver["path"], "w") as f:
                    #     print ("""<pre>{0}</pre>""".format(json.dumps(html, indent=2)), file=f)

        # Process text, html, dhtml, all options
        if args.all or args.html:
            html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))
            ver = {"type": "html"}
            versions.append(ver)
            ver["code"] = html["_code"]
            if html["_code"] == 200:
                html = html['data']['html']
                ver["path"] = p + ".raw.html"
                ver["url"] = quote(ver["path"])
                doc = html5lib.parse(
                    html, treebuilder="etree", namespaceHTMLElements=False
                )
                html5tidy(
                    doc,
                    indent=True,
                    title=padid,
                    scripts=args.script,
                    links=links,
                )
                with open(ver["path"], "w") as f:
                    print(
                        ET.tostring(doc, method="html", encoding="unicode"),
                        file=f,
                    )

        # output meta
        if args.all or args.meta:
            ver = {"type": "meta"}
            versions.append(ver)
            ver["path"] = metapath
            ver["url"] = quote(metapath)
            with open(metapath, "w") as f:
                json.dump(meta, f, indent=2)

    print("\n{0} pad(s) loaded".format(count), file=sys.stderr)
Add maintenance tools and run them 5 years ago			`import json`
			`import os`
			`import re`
			`import sys`
make file friendliness 9 years ago			`from argparse import ArgumentParser`
			`from datetime import datetime`
Add maintenance tools and run them 5 years ago			`from fnmatch import fnmatch`
			`from time import sleep`
			`from urllib.parse import quote, urlencode`
			`from urllib.request import HTTPError, URLError, urlopen`
			`from xml.etree import ElementTree as ET`
python3 6 years ago
Add maintenance tools and run them 5 years ago			`import html5lib`
python3 6 years ago
renaming all files to etherpump + adding a etherpump readme 5 years ago			`from etherpump.commands.common import *`
			`from etherpump.commands.html5tidy import html5tidy`
add --glob option to pull 6 years ago
make file friendliness 9 years ago
			`"""`
changes 9 years ago			`pull(meta):`
make file friendliness 9 years ago			`Update meta data files for those that have changed.`
			`Check for changed pads by looking at revisions & comparing to existing`

new 9 years ago
			`todo...`
			`use/prefer public interfaces ? (export functions)`


make file friendliness 9 years ago			`"""`

Add maintenance tools and run them 5 years ago
			`def try_deleting(files):`
NOPUBLISH seems to work 8 years ago			`for f in files:`
			`try:`
			`os.remove(f)`
			`except OSError as e:`
			`pass`

Add maintenance tools and run them 5 years ago
			`def main(args):`
			`p = ArgumentParser(`
			`"Check for pads that have changed since last sync (according to .meta.json)"`
			`)`
add --glob option to pull 6 years ago
added padids for fine-grained syn 9 years ago			`p.add_argument("padid", nargs="*", default=[])`
Add maintenance tools and run them 5 years ago			`p.add_argument(`
			`"--glob", default=False, help="download pads matching a glob pattern"`
			`)`

			`p.add_argument(`
			`"--padinfo",`
			`default=".etherpump/settings.json",`
			`help="settings, default: .etherpump/settings.json",`
			`)`
			`p.add_argument(`
			`"--zerorevs",`
			`default=False,`
			`action="store_true",`
			`help="include pads with zero revisions, default: False (i.e. pads with no revisions are skipped)",`
			`)`
			`p.add_argument(`
			`"--pub",`
			`default="p",`
			`help="folder to store files for public pads, default: p",`
			`)`
			`p.add_argument(`
			`"--group",`
			`default="g",`
			`help="folder to store files for group pads, default: g",`
			`)`
			`p.add_argument(`
			`"--skip",`
			`default=None,`
			`type=int,`
			`help="skip this many items, default: None",`
			`)`
			`p.add_argument(`
			`"--meta",`
			`default=False,`
			`action="store_true",`
			`help="download meta to PADID.meta.json, default: False",`
			`)`
			`p.add_argument(`
			`"--text",`
			`default=False,`
			`action="store_true",`
			`help="download text to PADID.txt, default: False",`
			`)`
			`p.add_argument(`
			`"--html",`
			`default=False,`
			`action="store_true",`
			`help="download html to PADID.html, default: False",`
			`)`
			`p.add_argument(`
			`"--dhtml",`
			`default=False,`
			`action="store_true",`
			`help="download dhtml to PADID.diff.html, default: False",`
			`)`
			`p.add_argument(`
			`"--all",`
			`default=False,`
			`action="store_true",`
			`help="download all files (meta, text, html, dhtml), default: False",`
			`)`
			`p.add_argument(`
			`"--folder",`
			`default=False,`
			`action="store_true",`
			`help="dump files in a folder named PADID (meta, text, html, dhtml), default: False",`
			`)`
			`p.add_argument(`
			`"--output",`
			`default=False,`
			`action="store_true",`
			`help="output changed padids on stdout",`
			`)`
			`p.add_argument(`
			`"--force",`
			`default=False,`
			`action="store_true",`
			`help="reload, even if revisions count matches previous",`
			`)`
			`p.add_argument(`
			`"--no-raw-ext",`
			`default=False,`
			`action="store_true",`
			`help="save plain text as padname with no (additional) extension",`
			`)`
			`p.add_argument(`
			`"--fix-names",`
			`default=False,`
			`action="store_true",`
			`help="normalize padid's (no spaces, special control chars) for use in file names",`
			`)`

			`p.add_argument(`
			`"--filter-ext", default=None, help="filter pads by extension"`
			`)`

			`p.add_argument(`
			`"--css",`
			`default="/styles.css",`
			`help="add css url to output pages, default: /styles.css",`
			`)`
			`p.add_argument(`
			`"--script",`
			`default="/versions.js",`
			`help="add script url to output pages, default: /versions.js",`
			`)`

			`p.add_argument(`
			`"--nopublish",`
			`default="__NOPUBLISH__",`
			`help="no publish magic word, default: __NOPUBLISH__",`
			`)`
			`p.add_argument(`
			`"--publish",`
			`default="__PUBLISH__",`
			`help="the publish magic word, default: __PUBLISH__",`
			`)`
			`p.add_argument(`
			`"--publish-opt-in",`
			`default=False,`
			`action="store_true",`
			help="ensure `--publish` is honoured instead of `--nopublish`",
			`)`
added NOPUBLISH 8 years ago
make file friendliness 9 years ago			`args = p.parse_args(args)`

added --no-raw-ext option to suppress (additional) extensions on pad ids 7 years ago			`raw_ext = ".raw.txt"`
			`if args.no_raw_ext:`
			`raw_ext = ""`

changes 9 years ago			`info = loadpadinfo(args.padinfo)`
make file friendliness 9 years ago			`data = {}`
			`data['apikey'] = info['apikey']`
added padids for fine-grained syn 9 years ago
			`if args.padid:`
add --glob option to pull 6 years ago			`padids = args.padid`
			`elif args.glob:`
Add maintenance tools and run them 5 years ago			`padids = getjson(`
			`info['localapiurl'] + 'listAllPads?' + urlencode(data)`
			`)['data']['padIDs']`
add --glob option to pull 6 years ago			`padids = [x for x in padids if fnmatch(x, args.glob)]`
added padids for fine-grained syn 9 years ago			`else:`
Add maintenance tools and run them 5 years ago			`padids = getjson(`
			`info['localapiurl'] + 'listAllPads?' + urlencode(data)`
			`)['data']['padIDs']`
make file friendliness 9 years ago			`padids.sort()`
			`numpads = len(padids)`
self contained sync command with per output options and all flag 9 years ago			`# maxmsglen = 0`
make file friendliness 9 years ago			`count = 0`
			`for i, padid in enumerate(padids):`
Add maintenance tools and run them 5 years ago			`if args.skip != None and i < args.skip:`
make file friendliness 9 years ago			`continue`
changes 9 years ago			`progressbar(i, numpads, padid)`
Remove Python 2 cruft 5 years ago
Avoid encoding issues with Python 3 5 years ago			`data['padID'] = padid`
python3 6 years ago			`p = padpath(padid, args.pub, args.group, args.fix_names)`
new 9 years ago			`if args.folder:`
Avoid encoding issues with Python 3 5 years ago			`p = os.path.join(p, padid)`
new 9 years ago
make file friendliness 9 years ago			`metapath = p + ".meta.json"`
			`revisions = None`
self contained sync command with per output options and all flag 9 years ago			`tries = 1`
			`skip = False`
new 9 years ago			`padurlbase = re.sub(r"api/1.2.9/$", "p/", info["apiurl"])`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`meta = {}`
Avoid encoding issues with Python 3 5 years ago
self contained sync command with per output options and all flag 9 years ago			`while True:`
			`try:`
			`if os.path.exists(metapath):`
			`with open(metapath) as f:`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`meta.update(json.load(f))`
Add maintenance tools and run them 5 years ago			`revisions = getjson(`
			`info['localapiurl']`
			`+ 'getRevisionsCount?'`
			`+ urlencode(data)`
			`)['data']['revisions']`
new 9 years ago			`if meta['revisions'] == revisions and not args.force:`
Add maintenance tools and run them 5 years ago			`skip = True`
self contained sync command with per output options and all flag 9 years ago			`break`
Remove Python 2 cruft 5 years ago
Avoid encoding issues with Python 3 5 years ago			`meta['padid'] = padid`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`versions = meta["versions"] = []`
Add maintenance tools and run them 5 years ago			`versions.append(`
			`{`
			`"url": padurlbase + quote(padid),`
			`"type": "pad",`
			`"code": 200,`
			`}`
			`)`
new 9 years ago
self contained sync command with per output options and all flag 9 years ago			`if revisions == None:`
Add maintenance tools and run them 5 years ago			`meta['revisions'] = getjson(`
			`info['localapiurl']`
			`+ 'getRevisionsCount?'`
			`+ urlencode(data)`
			`)['data']['revisions']`
self contained sync command with per output options and all flag 9 years ago			`else:`
Add maintenance tools and run them 5 years ago			`meta['revisions'] = revisions`
self contained sync command with per output options and all flag 9 years ago
			`if (meta['revisions'] == 0) and (not args.zerorevs):`
			`# print("Skipping zero revs", file=sys.stderr)`
Add maintenance tools and run them 5 years ago			`skip = True`
self contained sync command with per output options and all flag 9 years ago			`break`

			`# todo: load more metadata!`
new 9 years ago			`meta['group'], meta['pad'] = splitpadname(padid)`
self contained sync command with per output options and all flag 9 years ago			`meta['pathbase'] = p`
Add maintenance tools and run them 5 years ago			`meta['lastedited_raw'] = int(`
			`getjson(`
			`info['localapiurl'] + 'getLastEdited?' + urlencode(data)`
			`)['data']['lastEdited']`
			`)`
			`meta['lastedited_iso'] = datetime.fromtimestamp(`
			`int(meta['lastedited_raw']) / 1000`
			`).isoformat()`
			`meta['author_ids'] = getjson(`
			`info['localapiurl'] + 'listAuthorsOfPad?' + urlencode(data)`
			`)['data']['authorIDs']`
self contained sync command with per output options and all flag 9 years ago			`break`
			`except HTTPError as e:`
			`tries += 1`
			`if tries > 3:`
Add maintenance tools and run them 5 years ago			`print(`
			`"Too many failures ({0}), skipping".format(padid),`
			`file=sys.stderr,`
			`)`
			`skip = True`
self contained sync command with per output options and all flag 9 years ago			`break`
new 9 years ago			`else:`
			`sleep(3)`
catch type error when pulling phantom pads 8 years ago			`except TypeError as e:`
Add maintenance tools and run them 5 years ago			`print(`
			`"Type Error loading pad {0} (phantom pad?), skipping".format(`
			`padid`
			`),`
			`file=sys.stderr,`
			`)`
			`skip = True`
catch type error when pulling phantom pads 8 years ago			`break`
self contained sync command with per output options and all flag 9 years ago
			`if skip:`
make file friendliness 9 years ago			`continue`

			`count += 1`
self contained sync command with per output options and all flag 9 years ago
new 9 years ago			`if args.output:`
Add maintenance tools and run them 5 years ago			`print(padid)`
self contained sync command with per output options and all flag 9 years ago
			`if args.all or (args.meta or args.text or args.html or args.dhtml):`
			`try:`
			`os.makedirs(os.path.split(metapath)[0])`
			`except OSError:`
			`pass`

new pull, new meta style from live constant etherdumpÄ 8 years ago			`if args.all or args.text:`
Add maintenance tools and run them 5 years ago			`text = getjson(info['localapiurl'] + 'getText?' + urlencode(data))`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`ver = {"type": "text"}`
			`versions.append(ver)`
			`ver["code"] = text["_code"]`
			`if text["_code"] == 200:`
			`text = text['data']['text']`
NOPUBLISH seems to work 8 years ago
			`##########################################`
			`## ENFORCE __NOPUBLISH__ MAGIC WORD`
			`##########################################`
			`if args.nopublish and args.nopublish in text:`
			`# NEED TO PURGE ANY EXISTING DOCS`
Add maintenance tools and run them 5 years ago			`try_deleting(`
			`(`
			`p + raw_ext,`
			`p + ".raw.html",`
			`p + ".diff.html",`
			`p + ".meta.json",`
			`)`
			`)`
NOPUBLISH seems to work 8 years ago			`continue`

Add `__PUBLISH__` logic Closes https://gitlab.constantvzw.org/aa/etherdump/issues/3. This allows for the following to be run: etherdump pull --publish-opt-in --all --pub mydump And if `__PUBLISH__` is not present on the pads, then the pad will not be archived. It is also possible to configure this magic word by specifying the `--publish ...` option. 5 years ago			`##########################################`
			`## ENFORCE __PUBLISH__ MAGIC WORD`
			`##########################################`
			`if args.publish_opt_in and args.publish not in text:`
Add maintenance tools and run them 5 years ago			`try_deleting(`
			`(`
			`p + raw_ext,`
			`p + ".raw.html",`
			`p + ".diff.html",`
			`p + ".meta.json",`
			`)`
			`)`
Add `__PUBLISH__` logic Closes https://gitlab.constantvzw.org/aa/etherdump/issues/3. This allows for the following to be run: etherdump pull --publish-opt-in --all --pub mydump And if `__PUBLISH__` is not present on the pads, then the pad will not be archived. It is also possible to configure this magic word by specifying the `--publish ...` option. 5 years ago			`continue`

Add maintenance tools and run them 5 years ago			`ver["path"] = p + raw_ext`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`ver["url"] = quote(ver["path"])`
			`with open(ver["path"], "w") as f:`
python3 6 years ago			`f.write(text)`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`# once the content is settled, compute a hash`
			`# and link it in the metadata!`
self contained sync command with per output options and all flag 9 years ago
pull with html5tidy and version links 8 years ago			`links = []`
dump styles and versions script now parameterized to customize 8 years ago			`if args.css:`
Add maintenance tools and run them 5 years ago			`links.append({"href": args.css, "rel": "stylesheet"})`
pull with html5tidy and version links 8 years ago			`# todo, make this process reflect which files actually were made`
python3 6 years ago			`versionbaseurl = quote(padid)`
Add maintenance tools and run them 5 years ago			`links.append(`
			`{`
			`"href": versions[0]["url"],`
			`"rel": "alternate",`
			`"type": "text/html",`
			`"title": "Etherpad",`
			`}`
			`)`
Only all requested versions in links 6 years ago			`if args.all or args.text:`
Add maintenance tools and run them 5 years ago			`links.append(`
			`{`
			`"href": versionbaseurl + raw_ext,`
			`"rel": "alternate",`
			`"type": "text/plain",`
			`"title": "Plain text",`
			`}`
			`)`
Only all requested versions in links 6 years ago			`if args.all or args.html:`
Add maintenance tools and run them 5 years ago			`links.append(`
			`{`
			`"href": versionbaseurl + ".raw.html",`
			`"rel": "alternate",`
			`"type": "text/html",`
			`"title": "HTML",`
			`}`
			`)`
Only all requested versions in links 6 years ago			`if args.all or args.dhtml:`
Add maintenance tools and run them 5 years ago			`links.append(`
			`{`
			`"href": versionbaseurl + ".diff.html",`
			`"rel": "alternate",`
			`"type": "text/html",`
			`"title": "HTML with author colors",`
			`}`
			`)`
Only all requested versions in links 6 years ago			`if args.all or args.meta:`
Add maintenance tools and run them 5 years ago			`links.append(`
			`{`
			`"href": versionbaseurl + ".meta.json",`
			`"rel": "alternate",`
			`"type": "application/json",`
			`"title": "Meta data",`
			`}`
			`)`
Only all requested versions in links 6 years ago
			`# links.append({"href":"/", "rel":"search", "type":"text/html", "title":"Index"})`
pull with html5tidy and version links 8 years ago
self contained sync command with per output options and all flag 9 years ago			`if args.all or args.dhtml:`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`data['startRev'] = "0"`
Add maintenance tools and run them 5 years ago			`html = getjson(`
			`info['localapiurl'] + 'createDiffHTML?' + urlencode(data)`
			`)`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`ver = {"type": "diffhtml"}`
			`versions.append(ver)`
Remove Python 2 cruft 5 years ago			`ver["code"] = html["_code"]`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`if html["_code"] == 200:`
updated pull to deal with errors in diffhtml output that was halting the dump process 6 years ago			`try:`
			`html = html['data']['html']`
Add maintenance tools and run them 5 years ago			`ver["path"] = p + ".diff.html"`
updated pull to deal with errors in diffhtml output that was halting the dump process 6 years ago			`ver["url"] = quote(ver["path"])`
			`# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)`
Add maintenance tools and run them 5 years ago			`doc = html5lib.parse(`
			`html, treebuilder="etree", namespaceHTMLElements=False`
			`)`
			`html5tidy(`
			`doc,`
			`indent=True,`
			`title=padid,`
			`scripts=args.script,`
			`links=links,`
			`)`
updated pull to deal with errors in diffhtml output that was halting the dump process 6 years ago			`with open(ver["path"], "w") as f:`
Add maintenance tools and run them 5 years ago			`print(`
			`ET.tostring(doc, method="html", encoding="unicode"),`
			`file=f,`
			`)`
updated pull to deal with errors in diffhtml output that was halting the dump process 6 years ago			`except TypeError:`
			`# Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!`
			`ver["message"] = html["message"]`
			`# with open(ver["path"], "w") as f:`
			`# print ("""<pre>{0}</pre>""".format(json.dumps(html, indent=2)), file=f)`
pull with html5tidy and version links 8 years ago
			`# Process text, html, dhtml, all options`
			`if args.all or args.html:`
Add maintenance tools and run them 5 years ago			`html = getjson(info['localapiurl'] + 'getHTML?' + urlencode(data))`
pull with html5tidy and version links 8 years ago			`ver = {"type": "html"}`
			`versions.append(ver)`
			`ver["code"] = html["_code"]`
			`if html["_code"] == 200:`
			`html = html['data']['html']`
Add maintenance tools and run them 5 years ago			`ver["path"] = p + ".raw.html"`
pull with html5tidy and version links 8 years ago			`ver["url"] = quote(ver["path"])`
Add maintenance tools and run them 5 years ago			`doc = html5lib.parse(`
			`html, treebuilder="etree", namespaceHTMLElements=False`
			`)`
			`html5tidy(`
			`doc,`
			`indent=True,`
			`title=padid,`
			`scripts=args.script,`
			`links=links,`
			`)`
new pull, new meta style from live constant etherdumpÄ 8 years ago			`with open(ver["path"], "w") as f:`
Add maintenance tools and run them 5 years ago			`print(`
			`ET.tostring(doc, method="html", encoding="unicode"),`
			`file=f,`
			`)`
new pull, new meta style from live constant etherdumpÄ 8 years ago
			`# output meta`
			`if args.all or args.meta:`
			`ver = {"type": "meta"}`
			`versions.append(ver)`
			`ver["path"] = metapath`
			`ver["url"] = quote(metapath)`
			`with open(metapath, "w") as f:`
			`json.dump(meta, f, indent=2)`
self contained sync command with per output options and all flag 9 years ago
new 9 years ago			`print("\n{0} pad(s) loaded".format(count), file=sys.stderr)`