llvm-for-llvmta/utils/demangle_tree.py

# Given a path to llvm-objdump and a directory tree, spider the directory tree
# dumping every object file encountered with correct options needed to demangle
# symbols in the object file, and collect statistics about failed / crashed
# demanglings.  Useful for stress testing the demangler against a large corpus
# of inputs.

from __future__ import print_function

import argparse
import functools
import os
import re
import sys
import subprocess
import traceback
from multiprocessing import Pool
import multiprocessing

args = None

def parse_line(line):
    question = line.find('?')
    if question == -1:
        return None, None

    open_paren = line.find('(', question)
    if open_paren == -1:
        return None, None
    close_paren = line.rfind(')', open_paren)
    if open_paren == -1:
        return None, None
    mangled = line[question : open_paren]
    demangled = line[open_paren+1 : close_paren]
    return mangled.strip(), demangled.strip()

class Result(object):
    def __init__(self):
        self.crashed = []
        self.file = None
        self.nsymbols = 0
        self.errors = set()
        self.nfiles = 0

class MapContext(object):
    def __init__(self):
        self.rincomplete = None
        self.rcumulative = Result()
        self.pending_objs = []
        self.npending = 0

def process_file(path, objdump):
    r = Result()
    r.file = path

    popen_args = [objdump, '-t', '-demangle', path]
    p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = p.communicate()
    if p.returncode != 0:
        r.crashed = [r.file]
        return r

    output = stdout.decode('utf-8')

    for line in output.splitlines():
        mangled, demangled = parse_line(line)
        if mangled is None:
            continue
        r.nsymbols += 1
        if "invalid mangled name" in demangled:
            r.errors.add(mangled)
    return r

def add_results(r1, r2):
    r1.crashed.extend(r2.crashed)
    r1.errors.update(r2.errors)
    r1.nsymbols += r2.nsymbols
    r1.nfiles += r2.nfiles

def print_result_row(directory, result):
    print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
        result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))

def process_one_chunk(pool, chunk_size, objdump, context):
    objs = []

    incomplete = False
    dir_results = {}
    ordered_dirs = []
    while context.npending > 0 and len(objs) < chunk_size:
        this_dir = context.pending_objs[0][0]
        ordered_dirs.append(this_dir)
        re = Result()
        if context.rincomplete is not None:
            re = context.rincomplete
            context.rincomplete = None

        dir_results[this_dir] = re
        re.file = this_dir

        nneeded = chunk_size - len(objs)
        objs_this_dir = context.pending_objs[0][1]
        navail = len(objs_this_dir)
        ntaken = min(nneeded, navail)
        objs.extend(objs_this_dir[0:ntaken])
        remaining_objs_this_dir = objs_this_dir[ntaken:]
        context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
        context.npending -= ntaken
        if ntaken == navail:
            context.pending_objs.pop(0)
        else:
            incomplete = True

        re.nfiles += ntaken

    assert(len(objs) == chunk_size or context.npending == 0)

    copier = functools.partial(process_file, objdump=objdump)
    mapped_results = list(pool.map(copier, objs))

    for mr in mapped_results:
        result_dir = os.path.dirname(mr.file)
        result_entry = dir_results[result_dir]
        add_results(result_entry, mr)

    # It's only possible that a single item is incomplete, and it has to be the
    # last item.
    if incomplete:
        context.rincomplete = dir_results[ordered_dirs[-1]]
        ordered_dirs.pop()

    # Now ordered_dirs contains a list of all directories which *did* complete.
    for c in ordered_dirs:
        re = dir_results[c]
        add_results(context.rcumulative, re)
        print_result_row(c, re)

def process_pending_files(pool, chunk_size, objdump, context):
    while context.npending >= chunk_size:
        process_one_chunk(pool, chunk_size, objdump, context)

def go():
    global args

    obj_dir = args.dir
    extensions = args.extensions.split(',')
    extensions = [x if x[0] == '.' else '.' + x for x in extensions]


    pool_size = 48
    pool = Pool(processes=pool_size)

    try:
        nfiles = 0
        context = MapContext()

        for root, dirs, files in os.walk(obj_dir):
            root = os.path.normpath(root)
            pending = []
            for f in files:
                file, ext = os.path.splitext(f)
                if not ext in extensions:
                    continue

                nfiles += 1
                full_path = os.path.join(root, f)
                full_path = os.path.normpath(full_path)
                pending.append(full_path)

            # If this directory had no object files, just print a default
            # status line and continue with the next dir
            if len(pending) == 0:
                print_result_row(root, Result())
                continue

            context.npending += len(pending)
            context.pending_objs.append((root, pending))
            # Drain the tasks, `pool_size` at a time, until we have less than
            # `pool_size` tasks remaining.
            process_pending_files(pool, pool_size, args.objdump, context)

        assert(context.npending < pool_size);
        process_one_chunk(pool, pool_size, args.objdump, context)

        total = context.rcumulative
        nfailed = len(total.errors)
        nsuccess = total.nsymbols - nfailed
        ncrashed = len(total.crashed)

        if (nfailed > 0):
            print("Failures:")
            for m in sorted(total.errors):
                print("  " + m)
        if (ncrashed > 0):
            print("Crashes:")
            for f in sorted(total.crashed):
                print("  " + f)
        print("Summary:")
        spct = float(nsuccess)/float(total.nsymbols)
        fpct = float(nfailed)/float(total.nsymbols)
        cpct = float(ncrashed)/float(nfiles)
        print("Processed {0} object files.".format(nfiles))
        print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))
        print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
        print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
            
    except:
        traceback.print_exc()

    pool.close()
    pool.join()

if __name__ == "__main__":
    def_obj = 'obj' if sys.platform == 'win32' else 'o'

    parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')
    parser.add_argument('dir', type=str, help='the root directory at which to start crawling')
    parser.add_argument('--objdump', type=str, help='path to llvm-objdump.  If not specified ' +
                        'the tool is located as if by `which llvm-objdump`.')
    parser.add_argument('--extensions', type=str, default=def_obj,
                        help='comma separated list of extensions to demangle (e.g. `o,obj`).  ' +
                        'By default this will be `obj` on Windows and `o` otherwise.')

    args = parser.parse_args()


    multiprocessing.freeze_support()
    go()
first commit 2022-04-25 10:02:23 +02:00			`# Given a path to llvm-objdump and a directory tree, spider the directory tree`
			`# dumping every object file encountered with correct options needed to demangle`
			`# symbols in the object file, and collect statistics about failed / crashed`
			`# demanglings. Useful for stress testing the demangler against a large corpus`
			`# of inputs.`

			`from __future__ import print_function`

			`import argparse`
			`import functools`
			`import os`
			`import re`
			`import sys`
			`import subprocess`
			`import traceback`
			`from multiprocessing import Pool`
			`import multiprocessing`

			`args = None`

			`def parse_line(line):`
			`question = line.find('?')`
			`if question == -1:`
			`return None, None`

			`open_paren = line.find('(', question)`
			`if open_paren == -1:`
			`return None, None`
			`close_paren = line.rfind(')', open_paren)`
			`if open_paren == -1:`
			`return None, None`
			`mangled = line[question : open_paren]`
			`demangled = line[open_paren+1 : close_paren]`
			`return mangled.strip(), demangled.strip()`

			`class Result(object):`
			`def __init__(self):`
			`self.crashed = []`
			`self.file = None`
			`self.nsymbols = 0`
			`self.errors = set()`
			`self.nfiles = 0`

			`class MapContext(object):`
			`def __init__(self):`
			`self.rincomplete = None`
			`self.rcumulative = Result()`
			`self.pending_objs = []`
			`self.npending = 0`

			`def process_file(path, objdump):`
			`r = Result()`
			`r.file = path`

			`popen_args = [objdump, '-t', '-demangle', path]`
			`p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`stdout, stderr = p.communicate()`
			`if p.returncode != 0:`
			`r.crashed = [r.file]`
			`return r`

			`output = stdout.decode('utf-8')`

			`for line in output.splitlines():`
			`mangled, demangled = parse_line(line)`
			`if mangled is None:`
			`continue`
			`r.nsymbols += 1`
			`if "invalid mangled name" in demangled:`
			`r.errors.add(mangled)`
			`return r`

			`def add_results(r1, r2):`
			`r1.crashed.extend(r2.crashed)`
			`r1.errors.update(r2.errors)`
			`r1.nsymbols += r2.nsymbols`
			`r1.nfiles += r2.nfiles`

			`def print_result_row(directory, result):`
			`print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(`
			`result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))`

			`def process_one_chunk(pool, chunk_size, objdump, context):`
			`objs = []`

			`incomplete = False`
			`dir_results = {}`
			`ordered_dirs = []`
			`while context.npending > 0 and len(objs) < chunk_size:`
			`this_dir = context.pending_objs[0][0]`
			`ordered_dirs.append(this_dir)`
			`re = Result()`
			`if context.rincomplete is not None:`
			`re = context.rincomplete`
			`context.rincomplete = None`

			`dir_results[this_dir] = re`
			`re.file = this_dir`

			`nneeded = chunk_size - len(objs)`
			`objs_this_dir = context.pending_objs[0][1]`
			`navail = len(objs_this_dir)`
			`ntaken = min(nneeded, navail)`
			`objs.extend(objs_this_dir[0:ntaken])`
			`remaining_objs_this_dir = objs_this_dir[ntaken:]`
			`context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)`
			`context.npending -= ntaken`
			`if ntaken == navail:`
			`context.pending_objs.pop(0)`
			`else:`
			`incomplete = True`

			`re.nfiles += ntaken`

			`assert(len(objs) == chunk_size or context.npending == 0)`

			`copier = functools.partial(process_file, objdump=objdump)`
			`mapped_results = list(pool.map(copier, objs))`

			`for mr in mapped_results:`
			`result_dir = os.path.dirname(mr.file)`
			`result_entry = dir_results[result_dir]`
			`add_results(result_entry, mr)`

			`# It's only possible that a single item is incomplete, and it has to be the`
			`# last item.`
			`if incomplete:`
			`context.rincomplete = dir_results[ordered_dirs[-1]]`
			`ordered_dirs.pop()`

			`# Now ordered_dirs contains a list of all directories which did complete.`
			`for c in ordered_dirs:`
			`re = dir_results[c]`
			`add_results(context.rcumulative, re)`
			`print_result_row(c, re)`

			`def process_pending_files(pool, chunk_size, objdump, context):`
			`while context.npending >= chunk_size:`
			`process_one_chunk(pool, chunk_size, objdump, context)`

			`def go():`
			`global args`

			`obj_dir = args.dir`
			`extensions = args.extensions.split(',')`
			`extensions = [x if x[0] == '.' else '.' + x for x in extensions]`


			`pool_size = 48`
			`pool = Pool(processes=pool_size)`

			`try:`
			`nfiles = 0`
			`context = MapContext()`

			`for root, dirs, files in os.walk(obj_dir):`
			`root = os.path.normpath(root)`
			`pending = []`
			`for f in files:`
			`file, ext = os.path.splitext(f)`
			`if not ext in extensions:`
			`continue`

			`nfiles += 1`
			`full_path = os.path.join(root, f)`
			`full_path = os.path.normpath(full_path)`
			`pending.append(full_path)`

			`# If this directory had no object files, just print a default`
			`# status line and continue with the next dir`
			`if len(pending) == 0:`
			`print_result_row(root, Result())`
			`continue`

			`context.npending += len(pending)`
			`context.pending_objs.append((root, pending))`
			# Drain the tasks, `pool_size` at a time, until we have less than
			# `pool_size` tasks remaining.
			`process_pending_files(pool, pool_size, args.objdump, context)`

			`assert(context.npending < pool_size);`
			`process_one_chunk(pool, pool_size, args.objdump, context)`

			`total = context.rcumulative`
			`nfailed = len(total.errors)`
			`nsuccess = total.nsymbols - nfailed`
			`ncrashed = len(total.crashed)`

			`if (nfailed > 0):`
			`print("Failures:")`
			`for m in sorted(total.errors):`
			`print(" " + m)`
			`if (ncrashed > 0):`
			`print("Crashes:")`
			`for f in sorted(total.crashed):`
			`print(" " + f)`
			`print("Summary:")`
			`spct = float(nsuccess)/float(total.nsymbols)`
			`fpct = float(nfailed)/float(total.nsymbols)`
			`cpct = float(ncrashed)/float(nfiles)`
			`print("Processed {0} object files.".format(nfiles))`
			`print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))`
			`print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))`
			`print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))`

			`except:`
			`traceback.print_exc()`

			`pool.close()`
			`pool.join()`

			`if __name__ == "__main__":`
			`def_obj = 'obj' if sys.platform == 'win32' else 'o'`

			`parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')`
			`parser.add_argument('dir', type=str, help='the root directory at which to start crawling')`
			`parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' +`
			'the tool is located as if by `which llvm-objdump`.')
			`parser.add_argument('--extensions', type=str, default=def_obj,`
			help='comma separated list of extensions to demangle (e.g. `o,obj`). ' +
			'By default this will be `obj` on Windows and `o` otherwise.')

			`args = parser.parse_args()`


			`multiprocessing.freeze_support()`
			`go()`