"""\ Performance comparison for the xml.dom.expatbuilder DOM loader. Usage: %(program)s [-f file] [-p] [file] -f file Read the document to use from `file'. -p Enable profiling support. file Read the document to use from `file'. Use this or the `-f' option; not both. """ import getopt import os import sys import time from xml.dom import minidom, expatbuilder # XXX What's the right mix of markup items to text? What type of # XXX markup items? ## FRAGMENT = '''\ ## ## ## ## ''' FRAGMENT = '''\ This is < sample > text. ''' CHUNKS = 12000 LOGFILE = "hotshot.log" first = 1 chunks = CHUNKS if sys.argv[1:]: try: chunks = int(sys.argv[-1]) except ValueError: pass else: del sys.argv[-1] def timeit(parsefunc, src): global first if first: print "Document source contains", len(src), "bytes." first = 0 modname = parsefunc.func_globals["__name__"] t1 = time.time() doc = parsefunc(src) t2 = time.time() doc.unlink() print ("using %s.parseString():" % modname), t2 - t1 return t2 - t1 def usage(err=None, rc=0): program = os.path.basename(sys.argv[0]) if rc: f = sys.stderr else: f = sys.stdout if err: print >>f, "%s: %s" % (program, err) print >>f print >>f, __doc__ % {"program": program} sys.exit(rc) do_profile = 0 filename = None opts, args = getopt.getopt(sys.argv[1:], "f:hp", ["file=", "help", "profile="]) for opt, arg in opts: if opt in ('-f', '--file'): if filename is not None: usage("`-f' argument may only be given once", rc=2) if args: usage("`-f' and additional file argument are not compatible", rc=2) filename = arg elif opt in ('-h', '--help'): usage() elif opt == '-p': do_profile = 1 elif opt == '--profile': do_profile = 1 LOGFILE = arg if len(args) > 1: usage("at most on file argument can be used", rc=2) if args: filename = args[0] if filename is not None: src = open(filename, 'rb') else: src = "%s" % (FRAGMENT * chunks) timeit(minidom.parseString, src) timeit(expatbuilder.parseString, src) if sys.argv[1:] == ["-p"]: if os.path.exists(LOGFILE): os.unlink(LOGFILE) import hotshot import hotshot.stats def profile(*args, **kw): profiler = hotshot.Profile(LOGFILE) src = "%s" % (FRAGMENT * chunks) profiler.runcall(expatbuilder.parseString, src, *args, **kw) profiler.close() stats = hotshot.stats.load(LOGFILE) stats.strip_dirs() stats.sort_stats('calls', 'time') stats.print_stats(20) profile()