229 lines
7.3 KiB
Python
229 lines
7.3 KiB
Python
|
# Given a path to llvm-objdump and a directory tree, spider the directory tree
|
||
|
# dumping every object file encountered with correct options needed to demangle
|
||
|
# symbols in the object file, and collect statistics about failed / crashed
|
||
|
# demanglings. Useful for stress testing the demangler against a large corpus
|
||
|
# of inputs.
|
||
|
|
||
|
from __future__ import print_function
|
||
|
|
||
|
import argparse
|
||
|
import functools
|
||
|
import os
|
||
|
import re
|
||
|
import sys
|
||
|
import subprocess
|
||
|
import traceback
|
||
|
from multiprocessing import Pool
|
||
|
import multiprocessing
|
||
|
|
||
|
args = None
|
||
|
|
||
|
def parse_line(line):
|
||
|
question = line.find('?')
|
||
|
if question == -1:
|
||
|
return None, None
|
||
|
|
||
|
open_paren = line.find('(', question)
|
||
|
if open_paren == -1:
|
||
|
return None, None
|
||
|
close_paren = line.rfind(')', open_paren)
|
||
|
if open_paren == -1:
|
||
|
return None, None
|
||
|
mangled = line[question : open_paren]
|
||
|
demangled = line[open_paren+1 : close_paren]
|
||
|
return mangled.strip(), demangled.strip()
|
||
|
|
||
|
class Result(object):
|
||
|
def __init__(self):
|
||
|
self.crashed = []
|
||
|
self.file = None
|
||
|
self.nsymbols = 0
|
||
|
self.errors = set()
|
||
|
self.nfiles = 0
|
||
|
|
||
|
class MapContext(object):
|
||
|
def __init__(self):
|
||
|
self.rincomplete = None
|
||
|
self.rcumulative = Result()
|
||
|
self.pending_objs = []
|
||
|
self.npending = 0
|
||
|
|
||
|
def process_file(path, objdump):
|
||
|
r = Result()
|
||
|
r.file = path
|
||
|
|
||
|
popen_args = [objdump, '-t', '-demangle', path]
|
||
|
p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||
|
stdout, stderr = p.communicate()
|
||
|
if p.returncode != 0:
|
||
|
r.crashed = [r.file]
|
||
|
return r
|
||
|
|
||
|
output = stdout.decode('utf-8')
|
||
|
|
||
|
for line in output.splitlines():
|
||
|
mangled, demangled = parse_line(line)
|
||
|
if mangled is None:
|
||
|
continue
|
||
|
r.nsymbols += 1
|
||
|
if "invalid mangled name" in demangled:
|
||
|
r.errors.add(mangled)
|
||
|
return r
|
||
|
|
||
|
def add_results(r1, r2):
|
||
|
r1.crashed.extend(r2.crashed)
|
||
|
r1.errors.update(r2.errors)
|
||
|
r1.nsymbols += r2.nsymbols
|
||
|
r1.nfiles += r2.nfiles
|
||
|
|
||
|
def print_result_row(directory, result):
|
||
|
print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
|
||
|
result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))
|
||
|
|
||
|
def process_one_chunk(pool, chunk_size, objdump, context):
|
||
|
objs = []
|
||
|
|
||
|
incomplete = False
|
||
|
dir_results = {}
|
||
|
ordered_dirs = []
|
||
|
while context.npending > 0 and len(objs) < chunk_size:
|
||
|
this_dir = context.pending_objs[0][0]
|
||
|
ordered_dirs.append(this_dir)
|
||
|
re = Result()
|
||
|
if context.rincomplete is not None:
|
||
|
re = context.rincomplete
|
||
|
context.rincomplete = None
|
||
|
|
||
|
dir_results[this_dir] = re
|
||
|
re.file = this_dir
|
||
|
|
||
|
nneeded = chunk_size - len(objs)
|
||
|
objs_this_dir = context.pending_objs[0][1]
|
||
|
navail = len(objs_this_dir)
|
||
|
ntaken = min(nneeded, navail)
|
||
|
objs.extend(objs_this_dir[0:ntaken])
|
||
|
remaining_objs_this_dir = objs_this_dir[ntaken:]
|
||
|
context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
|
||
|
context.npending -= ntaken
|
||
|
if ntaken == navail:
|
||
|
context.pending_objs.pop(0)
|
||
|
else:
|
||
|
incomplete = True
|
||
|
|
||
|
re.nfiles += ntaken
|
||
|
|
||
|
assert(len(objs) == chunk_size or context.npending == 0)
|
||
|
|
||
|
copier = functools.partial(process_file, objdump=objdump)
|
||
|
mapped_results = list(pool.map(copier, objs))
|
||
|
|
||
|
for mr in mapped_results:
|
||
|
result_dir = os.path.dirname(mr.file)
|
||
|
result_entry = dir_results[result_dir]
|
||
|
add_results(result_entry, mr)
|
||
|
|
||
|
# It's only possible that a single item is incomplete, and it has to be the
|
||
|
# last item.
|
||
|
if incomplete:
|
||
|
context.rincomplete = dir_results[ordered_dirs[-1]]
|
||
|
ordered_dirs.pop()
|
||
|
|
||
|
# Now ordered_dirs contains a list of all directories which *did* complete.
|
||
|
for c in ordered_dirs:
|
||
|
re = dir_results[c]
|
||
|
add_results(context.rcumulative, re)
|
||
|
print_result_row(c, re)
|
||
|
|
||
|
def process_pending_files(pool, chunk_size, objdump, context):
|
||
|
while context.npending >= chunk_size:
|
||
|
process_one_chunk(pool, chunk_size, objdump, context)
|
||
|
|
||
|
def go():
|
||
|
global args
|
||
|
|
||
|
obj_dir = args.dir
|
||
|
extensions = args.extensions.split(',')
|
||
|
extensions = [x if x[0] == '.' else '.' + x for x in extensions]
|
||
|
|
||
|
|
||
|
pool_size = 48
|
||
|
pool = Pool(processes=pool_size)
|
||
|
|
||
|
try:
|
||
|
nfiles = 0
|
||
|
context = MapContext()
|
||
|
|
||
|
for root, dirs, files in os.walk(obj_dir):
|
||
|
root = os.path.normpath(root)
|
||
|
pending = []
|
||
|
for f in files:
|
||
|
file, ext = os.path.splitext(f)
|
||
|
if not ext in extensions:
|
||
|
continue
|
||
|
|
||
|
nfiles += 1
|
||
|
full_path = os.path.join(root, f)
|
||
|
full_path = os.path.normpath(full_path)
|
||
|
pending.append(full_path)
|
||
|
|
||
|
# If this directory had no object files, just print a default
|
||
|
# status line and continue with the next dir
|
||
|
if len(pending) == 0:
|
||
|
print_result_row(root, Result())
|
||
|
continue
|
||
|
|
||
|
context.npending += len(pending)
|
||
|
context.pending_objs.append((root, pending))
|
||
|
# Drain the tasks, `pool_size` at a time, until we have less than
|
||
|
# `pool_size` tasks remaining.
|
||
|
process_pending_files(pool, pool_size, args.objdump, context)
|
||
|
|
||
|
assert(context.npending < pool_size);
|
||
|
process_one_chunk(pool, pool_size, args.objdump, context)
|
||
|
|
||
|
total = context.rcumulative
|
||
|
nfailed = len(total.errors)
|
||
|
nsuccess = total.nsymbols - nfailed
|
||
|
ncrashed = len(total.crashed)
|
||
|
|
||
|
if (nfailed > 0):
|
||
|
print("Failures:")
|
||
|
for m in sorted(total.errors):
|
||
|
print(" " + m)
|
||
|
if (ncrashed > 0):
|
||
|
print("Crashes:")
|
||
|
for f in sorted(total.crashed):
|
||
|
print(" " + f)
|
||
|
print("Summary:")
|
||
|
spct = float(nsuccess)/float(total.nsymbols)
|
||
|
fpct = float(nfailed)/float(total.nsymbols)
|
||
|
cpct = float(ncrashed)/float(nfiles)
|
||
|
print("Processed {0} object files.".format(nfiles))
|
||
|
print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))
|
||
|
print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
|
||
|
print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
|
||
|
|
||
|
except:
|
||
|
traceback.print_exc()
|
||
|
|
||
|
pool.close()
|
||
|
pool.join()
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
def_obj = 'obj' if sys.platform == 'win32' else 'o'
|
||
|
|
||
|
parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')
|
||
|
parser.add_argument('dir', type=str, help='the root directory at which to start crawling')
|
||
|
parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' +
|
||
|
'the tool is located as if by `which llvm-objdump`.')
|
||
|
parser.add_argument('--extensions', type=str, default=def_obj,
|
||
|
help='comma separated list of extensions to demangle (e.g. `o,obj`). ' +
|
||
|
'By default this will be `obj` on Windows and `o` otherwise.')
|
||
|
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
|
||
|
multiprocessing.freeze_support()
|
||
|
go()
|
||
|
|