From 0f07ee8647cfdbcb0ed6bc3136b6f27fa6e90caf Mon Sep 17 00:00:00 2001 From: Michael Murtaugh Date: Fri, 21 Oct 2016 13:04:15 +0200 Subject: [PATCH] changes, requests for settext with long texts --- README.md | 1 + etherdump/commands/html5tidy.py | 2 +- etherdump/commands/pull.py | 4 ++-- etherdump/commands/settext.py | 19 +++++++++++++++---- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 04b718a..297059b 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ Requirements ------------- * html5lib + * requests (settext) * python-datutil, jinja2 (index subcommand) diff --git a/etherdump/commands/html5tidy.py b/etherdump/commands/html5tidy.py index 6f5a574..2c643a5 100644 --- a/etherdump/commands/html5tidy.py +++ b/etherdump/commands/html5tidy.py @@ -140,7 +140,7 @@ def main (args): else: fin = sys.stdin - doc = parse(fin, namespaceHTMLElements=False) + doc = parse(fin, treebuilder="etree", namespaceHTMLElements=False) if fin != sys.stdin: fin.close() html5tidy(doc, scripts=args.script, links=links, title=args.title, indent=args.indent) diff --git a/etherdump/commands/pull.py b/etherdump/commands/pull.py index 58363fb..20d3bca 100644 --- a/etherdump/commands/pull.py +++ b/etherdump/commands/pull.py @@ -189,7 +189,7 @@ def main (args): html = html['data']['html'] ver["path"] = p+".diff.html" ver["url"] = quote(ver["path"]) - doc = html5lib.parse(html.encode("utf-8"), encoding="utf-8", namespaceHTMLElements=False) + doc = html5lib.parse(html.encode("utf-8"), treebuilder="etree", encoding="utf-8", namespaceHTMLElements=False) html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links) with open(ver["path"], "w") as f: # f.write(html.encode("utf-8")) @@ -205,7 +205,7 @@ def main (args): html = html['data']['html'] ver["path"] = p+".raw.html" ver["url"] = quote(ver["path"]) - doc = html5lib.parse(html, namespaceHTMLElements=False) + doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False) html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links) with open(ver["path"], "w") as f: # f.write(html.encode("utf-8")) diff --git a/etherdump/commands/settext.py b/etherdump/commands/settext.py index 79f950f..2ffe1f7 100644 --- a/etherdump/commands/settext.py +++ b/etherdump/commands/settext.py @@ -4,16 +4,20 @@ from argparse import ArgumentParser import json, sys from urllib import urlencode from urllib2 import urlopen, HTTPError, URLError +import requests +LIMIT_BYTES = 100*1000 + def main(args): p = ArgumentParser("calls the getText API function for the given padid") p.add_argument("padid", help="the padid") p.add_argument("--text", default=None, help="text, default: read from stdin") p.add_argument("--padinfo", default=".etherdump/settings.json", help="settings, default: .etherdump/settings.json") p.add_argument("--showurl", default=False, action="store_true") - p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") + # p.add_argument("--format", default="text", help="output format, can be: text, json; default: text") p.add_argument("--create", default=False, action="store_true", help="flag to create pad if necessary") + p.add_argument("--limit", default=False, action="store_true", help="limit text to 100k (etherpad limit)") args = p.parse_args(args) with open(args.padinfo) as f: @@ -31,19 +35,26 @@ def main(args): # print json.dumps(results, indent=2) if results['code'] != 0: createPad = True + if args.text: text = args.text else: text = sys.stdin.read() + + if len(text) > LIMIT_BYTES and args.limit: + print "limiting", len(text), LIMIT_BYTES + text = text[:LIMIT_BYTES] + data['text'] = text if createPad: - requesturl = apiurl+'createPad?'+urlencode(data) + requesturl = apiurl+'createPad' else: - requesturl = apiurl+'setText?'+urlencode(data) + requesturl = apiurl+'setText' if args.showurl: print requesturl else: - results = json.load(urlopen(requesturl)) + results = requests.post(requesturl, data=data) # json.load(urlopen(requesturl)) + results = json.loads(results.text) print json.dumps(results, indent=2)