|
|
@ -15,7 +15,6 @@ from xml.etree import ElementTree as ET |
|
|
|
import asks |
|
|
|
import html5lib |
|
|
|
import trio |
|
|
|
from tqdm import tqdm |
|
|
|
|
|
|
|
from etherpump.commands.common import * # noqa |
|
|
|
from etherpump.commands.html5tidy import html5tidy |
|
|
@ -446,16 +445,6 @@ async def handle_pad(args, padid, data, info, session): |
|
|
|
await f.write(json.dumps(meta)) |
|
|
|
|
|
|
|
|
|
|
|
async def handle_pad_chunk(args, padids, data, info, session): |
|
|
|
progress_kwargs = {} |
|
|
|
if not istty(): |
|
|
|
progress_kwargs.update(dict(disable=True)) |
|
|
|
|
|
|
|
padids = tqdm(iterable=padids, total=len(padids), **progress_kwargs,) |
|
|
|
for padid in padids: |
|
|
|
await handle_pad(args, padid, data, info, session) |
|
|
|
|
|
|
|
|
|
|
|
async def handle_pads(args): |
|
|
|
session = asks.Session(connections=args.connection) |
|
|
|
info = loadpadinfo(args.padinfo) |
|
|
@ -464,12 +453,11 @@ async def handle_pads(args): |
|
|
|
padids = await get_padids(args, info, data, session) |
|
|
|
if args.skip: |
|
|
|
padids = padids[args.skip : len(padids)] |
|
|
|
CHUNK_SIZE = math.ceil(len(padids) / 3) |
|
|
|
|
|
|
|
async with trio.open_nursery() as nursery: |
|
|
|
for padids in chunks(padids, CHUNK_SIZE): |
|
|
|
_args = (args, padids, data, info, session) |
|
|
|
nursery.start_soon(handle_pad_chunk, *_args) |
|
|
|
print("Processing {} pads now...".format(len(padids))) |
|
|
|
for padid in padids: |
|
|
|
nursery.start_soon(handle_pad, args, padid, data, info, session) |
|
|
|
|
|
|
|
|
|
|
|
def main(args): |
|
|
|