#!/usr/bin/env python from argparse import ArgumentParser import json, os, re from urllib import urlencode from urllib2 import urlopen, HTTPError, URLError def group (items, key=lambda x: x): ret = [] keys = {} for item in items: k = key(item) if k not in keys: keys[k] = [] keys[k].append(item) for k in sorted(keys): keys[k].sort() ret.append(keys[k]) return ret def main(args): p = ArgumentParser("") p.add_argument("input", nargs="+", help="filenames") args = p.parse_args(args) inputs = args.input inputs.sort() def base (x): return re.sub(r"(\.html)|(\.diff\.html)|(\.meta\.json)|(\.txt)$", "", x) from pprint import pprint pprint(group(inputs, base))