From 55b3c538d55ef53b9871e6ba41376a2ceecc302a Mon Sep 17 00:00:00 2001 From: Christopher Haster Date: Mon, 7 Mar 2022 00:24:30 -0600 Subject: [PATCH] Added ./script/summary.py A full summary of static measurements (code size, stack usage, etc) can now be found with: make summary This is done through the combination of a new ./scripts/summary.py script and the ability of existing scripts to merge into existing csv files, allowing multiple results to be merged either in a pipeline, or in parallel with a single ./script/summary.py call. The ./scripts/summary.py script can also be used to quickly compare different builds or configurations. This is a proper implementation of a similar but hacky shell script that has already been very useful for making optimization decisions: $ ./scripts/structs.py new.csv -d old.csv --summary name (2 added, 0 removed) code stack structs TOTAL 28648 (-2.7%) 2448 1012 Also some other small tweaks to scripts: - Removed state saving diff rules. This isn't the most useful way to handle comparing changes. - Added short flags for --summary (-Y) and --files (-F), since these are quite often used. --- Makefile | 34 +++--- scripts/code.py | 73 ++++++++--- scripts/coverage.py | 69 ++++++++--- scripts/data.py | 72 ++++++++--- scripts/stack.py | 77 +++++++++--- scripts/structs.py | 75 +++++++++--- scripts/summary.py | 290 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 580 insertions(+), 110 deletions(-) create mode 100755 scripts/summary.py diff --git a/Makefile b/Makefile index 46773b7..bcd6f0e 100644 --- a/Makefile +++ b/Makefile @@ -56,6 +56,9 @@ endif ifdef EXEC override TESTFLAGS += --exec="$(EXEC)" endif +ifdef COVERAGE +override TESTFLAGS += --coverage +endif ifdef BUILDDIR override TESTFLAGS += --build-dir="$(BUILDDIR:/=)" override CALLSFLAGS += --build-dir="$(BUILDDIR:/=)" @@ -104,41 +107,34 @@ test%: tests/test$$(firstword $$(subst \#, ,%)).toml code: $(OBJ) ./scripts/code.py $^ -S $(CODEFLAGS) -.PHONY: code-diff -code-diff: $(OBJ) - ./scripts/code.py $^ -d $(TARGET).code.csv -o $(TARGET).code.csv $(CODEFLAGS) - .PHONY: data data: $(OBJ) ./scripts/data.py $^ -S $(DATAFLAGS) -.PHONY: data-diff -data-diff: $(OBJ) - ./scripts/data.py $^ -d $(TARGET).data.csv -o $(TARGET).data.csv $(DATAFLAGS) - .PHONY: stack stack: $(CGI) ./scripts/stack.py $^ -S $(STACKFLAGS) -.PHONY: stack-diff -stack-diff: $(CGI) - ./scripts/stack.py $^ -d $(TARGET).stack.csv -o $(TARGET).stack.csv $(STACKFLAGS) - .PHONY: structs structs: $(OBJ) ./scripts/structs.py $^ -S $(STRUCTSFLAGS) -.PHONY: structs-diff -structs-diff: $(OBJ) - ./scripts/structs.py $^ -d $(TARGET).structs.csv -o $(TARGET).structs.csv $(STRUCTSFLAGS) - .PHONY: coverage coverage: ./scripts/coverage.py $(BUILDDIR)tests/*.toml.info -s $(COVERAGEFLAGS) -.PHONY: coverage-diff -coverage-diff: - ./scripts/coverage.py $(BUILDDIR)tests/*.toml.info $(COVERAGEFLAGS) +.PHONY: summary +summary: $(OBJ) $(CGI) + $(strip \ + ./scripts/code.py $(OBJ) -q -o - $(CODEFLAGS) \ + | ./scripts/data.py $(OBJ) -q -m - -o - $(DATAFLAGS) \ + | ./scripts/stack.py $(CGI) -q -m - -o - $(STACKFLAGS) \ + | ./scripts/structs.py $(OBJ) -q -m - -o - $(STRUCTFLAGS) \ + $(if $(COVERAGE),\ + | ./scripts/coverage.py $(BUILDDIR)tests/*.toml.info \ + -q -m - -o - $(COVERAGEFLAGS)) \ + | ./scripts/summary.py $(SUMMARYFLAGS)) + # rules -include $(DEP) diff --git a/scripts/code.py b/scripts/code.py index 17be08a..73589c1 100755 --- a/scripts/code.py +++ b/scripts/code.py @@ -48,17 +48,30 @@ def collect(paths, **args): # map to source files if args.get('build_dir'): file = re.sub('%s/*' % re.escape(args['build_dir']), '', file) + # replace .o with .c, different scripts report .o/.c, we need to + # choose one if we want to deduplicate csv files + file = re.sub('\.o$', '.c', file) # discard internal functions if not args.get('everything'): if func.startswith('__'): continue # discard .8449 suffixes created by optimizer func = re.sub('\.[0-9]+', '', func) + flat_results.append((file, func, size)) return flat_results def main(**args): + def openio(path, mode='r'): + if path == '-': + if 'r' in mode: + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + # find sizes if not args.get('use', None): # find .o files @@ -76,13 +89,14 @@ def main(**args): results = collect(paths, **args) else: - with open(args['use']) as f: + with openio(args['use']) as f: r = csv.DictReader(f) results = [ ( result['file'], - result['function'], + result['name'], int(result['code_size'])) - for result in r] + for result in r + if result.get('code_size') not in {None, ''}] total = 0 for _, _, size in results: @@ -91,13 +105,14 @@ def main(**args): # find previous results? if args.get('diff'): try: - with open(args['diff']) as f: + with openio(args['diff']) as f: r = csv.DictReader(f) prev_results = [ ( result['file'], - result['function'], + result['name'], int(result['code_size'])) - for result in r] + for result in r + if result.get('code_size') not in {None, ''}] except FileNotFoundError: prev_results = [] @@ -107,14 +122,34 @@ def main(**args): # write results to CSV if args.get('output'): - with open(args['output'], 'w') as f: - w = csv.writer(f) - w.writerow(['file', 'function', 'code_size']) - for file, func, size in sorted(results): - w.writerow((file, func, size)) + merged_results = co.defaultdict(lambda: {}) + other_fields = [] + + # merge? + if args.get('merge'): + try: + with openio(args['merge']) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + func = result.pop('name', '') + result.pop('code_size', None) + merged_results[(file, func)] = result + other_fields = result.keys() + except FileNotFoundError: + pass + + for file, func, size in results: + merged_results[(file, func)]['code_size'] = size + + with openio(args['output'], 'w') as f: + w = csv.DictWriter(f, ['file', 'name', *other_fields, 'code_size']) + w.writeheader() + for (file, func), result in sorted(merged_results.items()): + w.writerow({'file': file, 'name': func, **result}) # print results - def dedup_entries(results, by='function'): + def dedup_entries(results, by='name'): entries = co.defaultdict(lambda: 0) for file, func, size in results: entry = (file if by == 'file' else func) @@ -162,7 +197,7 @@ def main(**args): diff, ' (%+.1f%%)' % (100*ratio) if ratio else '')) - def print_entries(by='function'): + def print_entries(by='name'): entries = dedup_entries(results, by=by) if not args.get('diff'): @@ -201,7 +236,7 @@ def main(**args): print_entries(by='file') print_totals() else: - print_entries(by='function') + print_entries(by='name') print_totals() if __name__ == "__main__": @@ -214,12 +249,16 @@ if __name__ == "__main__": or a list of paths. Defaults to %r." % OBJ_PATHS) parser.add_argument('-v', '--verbose', action='store_true', help="Output commands that run behind the scenes.") + parser.add_argument('-q', '--quiet', action='store_true', + help="Don't show anything, useful with -o.") parser.add_argument('-o', '--output', help="Specify CSV file to store results.") parser.add_argument('-u', '--use', help="Don't compile and find code sizes, instead use this CSV file.") parser.add_argument('-d', '--diff', help="Specify CSV file to diff code size against.") + parser.add_argument('-m', '--merge', + help="Merge with an existing CSV file when writing to output.") parser.add_argument('-a', '--all', action='store_true', help="Show all functions, not just the ones that changed.") parser.add_argument('-A', '--everything', action='store_true', @@ -228,13 +267,11 @@ if __name__ == "__main__": help="Sort by size.") parser.add_argument('-S', '--reverse-size-sort', action='store_true', help="Sort by size, but backwards.") - parser.add_argument('--files', action='store_true', + parser.add_argument('-F', '--files', action='store_true', help="Show file-level code sizes. Note this does not include padding! " "So sizes may differ from other tools.") - parser.add_argument('--summary', action='store_true', + parser.add_argument('-Y', '--summary', action='store_true', help="Only show the total code size.") - parser.add_argument('-q', '--quiet', action='store_true', - help="Don't show anything, useful with -o.") parser.add_argument('--type', default='tTrRdD', help="Type of symbols to report, this uses the same single-character " "type-names emitted by nm. Defaults to %(default)r.") diff --git a/scripts/coverage.py b/scripts/coverage.py index 0790b8a..b3a90ed 100755 --- a/scripts/coverage.py +++ b/scripts/coverage.py @@ -66,6 +66,15 @@ def collect(paths, **args): def main(**args): + def openio(path, mode='r'): + if path == '-': + if 'r' in mode: + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + # find coverage if not args.get('use'): # find *.info files @@ -83,14 +92,16 @@ def main(**args): results = collect(paths, **args) else: - with open(args['use']) as f: + with openio(args['use']) as f: r = csv.DictReader(f) results = [ ( result['file'], - result['function'], + result['name'], int(result['coverage_hits']), int(result['coverage_count'])) - for result in r] + for result in r + if result.get('coverage_hits') not in {None, ''} + if result.get('coverage_count') not in {None, ''}] total_hits, total_count = 0, 0 for _, _, hits, count in results: @@ -100,14 +111,16 @@ def main(**args): # find previous results? if args.get('diff'): try: - with open(args['diff']) as f: + with openio(args['diff']) as f: r = csv.DictReader(f) prev_results = [ ( result['file'], - result['function'], + result['name'], int(result['coverage_hits']), int(result['coverage_count'])) - for result in r] + for result in r + if result.get('coverage_hits') not in {None, ''} + if result.get('coverage_count') not in {None, ''}] except FileNotFoundError: prev_results = [] @@ -118,14 +131,36 @@ def main(**args): # write results to CSV if args.get('output'): - with open(args['output'], 'w') as f: - w = csv.writer(f) - w.writerow(['file', 'function', 'coverage_hits', 'coverage_count']) - for file, func, hits, count in sorted(results): - w.writerow((file, func, hits, count)) + merged_results = co.defaultdict(lambda: {}) + other_fields = [] + + # merge? + if args.get('merge'): + try: + with openio(args['merge']) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + func = result.pop('name', '') + result.pop('coverage_hits', None) + result.pop('coverage_count', None) + merged_results[(file, func)] = result + other_fields = result.keys() + except FileNotFoundError: + pass + + for file, func, hits, count in results: + merged_results[(file, func)]['coverage_hits'] = hits + merged_results[(file, func)]['coverage_count'] = count + + with openio(args['output'], 'w') as f: + w = csv.DictWriter(f, ['file', 'name', *other_fields, 'coverage_hits', 'coverage_count']) + w.writeheader() + for (file, func), result in sorted(merged_results.items()): + w.writerow({'file': file, 'name': func, **result}) # print results - def dedup_entries(results, by='function'): + def dedup_entries(results, by='name'): entries = co.defaultdict(lambda: (0, 0)) for file, func, hits, count in results: entry = (file if by == 'file' else func) @@ -197,7 +232,7 @@ def main(**args): '%+d/%+d' % (diff_hits, diff_count), ' (%+.1f%%)' % (100*ratio) if ratio else '')) - def print_entries(by='function'): + def print_entries(by='name'): entries = dedup_entries(results, by=by) if not args.get('diff'): @@ -245,7 +280,7 @@ def main(**args): print_entries(by='file') print_totals() else: - print_entries(by='function') + print_entries(by='name') print_totals() if __name__ == "__main__": @@ -266,6 +301,8 @@ if __name__ == "__main__": help="Don't do any work, instead use this CSV file.") parser.add_argument('-d', '--diff', help="Specify CSV file to diff code size against.") + parser.add_argument('-m', '--merge', + help="Merge with an existing CSV file when writing to output.") parser.add_argument('-a', '--all', action='store_true', help="Show all functions, not just the ones that changed.") parser.add_argument('-A', '--everything', action='store_true', @@ -274,9 +311,9 @@ if __name__ == "__main__": help="Sort by coverage.") parser.add_argument('-S', '--reverse-coverage-sort', action='store_true', help="Sort by coverage, but backwards.") - parser.add_argument('--files', action='store_true', + parser.add_argument('-F', '--files', action='store_true', help="Show file-level coverage.") - parser.add_argument('--summary', action='store_true', + parser.add_argument('-Y', '--summary', action='store_true', help="Only show the total coverage.") parser.add_argument('-q', '--quiet', action='store_true', help="Don't show anything, useful with -o.") diff --git a/scripts/data.py b/scripts/data.py index 5ef049e..ba87fac 100755 --- a/scripts/data.py +++ b/scripts/data.py @@ -48,6 +48,9 @@ def collect(paths, **args): # map to source files if args.get('build_dir'): file = re.sub('%s/*' % re.escape(args['build_dir']), '', file) + # replace .o with .c, different scripts report .o/.c, we need to + # choose one if we want to deduplicate csv files + file = re.sub('\.o$', '.c', file) # discard internal functions if not args.get('everything'): if func.startswith('__'): @@ -59,6 +62,15 @@ def collect(paths, **args): return flat_results def main(**args): + def openio(path, mode='r'): + if path == '-': + if 'r' in mode: + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + # find sizes if not args.get('use', None): # find .o files @@ -76,13 +88,14 @@ def main(**args): results = collect(paths, **args) else: - with open(args['use']) as f: + with openio(args['use']) as f: r = csv.DictReader(f) results = [ ( result['file'], - result['function'], + result['name'], int(result['data_size'])) - for result in r] + for result in r + if result.get('data_size') not in {None, ''}] total = 0 for _, _, size in results: @@ -91,13 +104,14 @@ def main(**args): # find previous results? if args.get('diff'): try: - with open(args['diff']) as f: + with openio(args['diff']) as f: r = csv.DictReader(f) prev_results = [ ( result['file'], - result['function'], + result['name'], int(result['data_size'])) - for result in r] + for result in r + if result.get('data_size') not in {None, ''}] except FileNotFoundError: prev_results = [] @@ -107,14 +121,34 @@ def main(**args): # write results to CSV if args.get('output'): - with open(args['output'], 'w') as f: - w = csv.writer(f) - w.writerow(['file', 'function', 'data_size']) - for file, func, size in sorted(results): - w.writerow((file, func, size)) + merged_results = co.defaultdict(lambda: {}) + other_fields = [] + + # merge? + if args.get('merge'): + try: + with openio(args['merge']) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + func = result.pop('name', '') + result.pop('data_size', None) + merged_results[(file, func)] = result + other_fields = result.keys() + except FileNotFoundError: + pass + + for file, func, size in results: + merged_results[(file, func)]['data_size'] = size + + with openio(args['output'], 'w') as f: + w = csv.DictWriter(f, ['file', 'name', *other_fields, 'data_size']) + w.writeheader() + for (file, func), result in sorted(merged_results.items()): + w.writerow({'file': file, 'name': func, **result}) # print results - def dedup_entries(results, by='function'): + def dedup_entries(results, by='name'): entries = co.defaultdict(lambda: 0) for file, func, size in results: entry = (file if by == 'file' else func) @@ -162,7 +196,7 @@ def main(**args): diff, ' (%+.1f%%)' % (100*ratio) if ratio else '')) - def print_entries(by='function'): + def print_entries(by='name'): entries = dedup_entries(results, by=by) if not args.get('diff'): @@ -201,7 +235,7 @@ def main(**args): print_entries(by='file') print_totals() else: - print_entries(by='function') + print_entries(by='name') print_totals() if __name__ == "__main__": @@ -214,12 +248,16 @@ if __name__ == "__main__": or a list of paths. Defaults to %r." % OBJ_PATHS) parser.add_argument('-v', '--verbose', action='store_true', help="Output commands that run behind the scenes.") + parser.add_argument('-q', '--quiet', action='store_true', + help="Don't show anything, useful with -o.") parser.add_argument('-o', '--output', help="Specify CSV file to store results.") parser.add_argument('-u', '--use', help="Don't compile and find data sizes, instead use this CSV file.") parser.add_argument('-d', '--diff', help="Specify CSV file to diff data size against.") + parser.add_argument('-m', '--merge', + help="Merge with an existing CSV file when writing to output.") parser.add_argument('-a', '--all', action='store_true', help="Show all functions, not just the ones that changed.") parser.add_argument('-A', '--everything', action='store_true', @@ -228,13 +266,11 @@ if __name__ == "__main__": help="Sort by size.") parser.add_argument('-S', '--reverse-size-sort', action='store_true', help="Sort by size, but backwards.") - parser.add_argument('--files', action='store_true', + parser.add_argument('-F', '--files', action='store_true', help="Show file-level data sizes. Note this does not include padding! " "So sizes may differ from other tools.") - parser.add_argument('--summary', action='store_true', + parser.add_argument('-Y', '--summary', action='store_true', help="Only show the total data size.") - parser.add_argument('-q', '--quiet', action='store_true', - help="Don't show anything, useful with -o.") parser.add_argument('--type', default='dDbB', help="Type of symbols to report, this uses the same single-character " "type-names emitted by nm. Defaults to %(default)r.") diff --git a/scripts/stack.py b/scripts/stack.py index cfa7ddb..0c652d8 100755 --- a/scripts/stack.py +++ b/scripts/stack.py @@ -116,6 +116,15 @@ def collect(paths, **args): return flat_results def main(**args): + def openio(path, mode='r'): + if path == '-': + if 'r' in mode: + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + # find sizes if not args.get('use', None): # find .ci files @@ -133,15 +142,17 @@ def main(**args): results = collect(paths, **args) else: - with open(args['use']) as f: + with openio(args['use']) as f: r = csv.DictReader(f) results = [ ( result['file'], - result['function'], + result['name'], int(result['stack_frame']), float(result['stack_limit']), # note limit can be inf set()) - for result in r] + for result in r + if result.get('stack_frame') not in {None, ''} + if result.get('stack_limit') not in {None, ''}] total_frame = 0 total_limit = 0 @@ -152,15 +163,17 @@ def main(**args): # find previous results? if args.get('diff'): try: - with open(args['diff']) as f: + with openio(args['diff']) as f: r = csv.DictReader(f) prev_results = [ ( result['file'], - result['function'], + result['name'], int(result['stack_frame']), float(result['stack_limit']), set()) - for result in r] + for result in r + if result.get('stack_frame') not in {None, ''} + if result.get('stack_limit') not in {None, ''}] except FileNotFoundError: prev_results = [] @@ -172,14 +185,36 @@ def main(**args): # write results to CSV if args.get('output'): - with open(args['output'], 'w') as f: - w = csv.writer(f) - w.writerow(['file', 'function', 'stack_frame', 'stack_limit']) - for file, func, frame, limit, _ in sorted(results): - w.writerow((file, func, frame, limit)) + merged_results = co.defaultdict(lambda: {}) + other_fields = [] + + # merge? + if args.get('merge'): + try: + with openio(args['merge']) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + func = result.pop('name', '') + result.pop('stack_frame', None) + result.pop('stack_limit', None) + merged_results[(file, func)] = result + other_fields = result.keys() + except FileNotFoundError: + pass + + for file, func, frame, limit, _ in results: + merged_results[(file, func)]['stack_frame'] = frame + merged_results[(file, func)]['stack_limit'] = limit + + with openio(args['output'], 'w') as f: + w = csv.DictWriter(f, ['file', 'name', *other_fields, 'stack_frame', 'stack_limit']) + w.writeheader() + for (file, func), result in sorted(merged_results.items()): + w.writerow({'file': file, 'name': func, **result}) # print results - def dedup_entries(results, by='function'): + def dedup_entries(results, by='name'): entries = co.defaultdict(lambda: (0, 0, set())) for file, func, frame, limit, deps in results: entry = (file if by == 'file' else func) @@ -272,7 +307,7 @@ def main(**args): else ' (-∞%)' if ratio < 0 and m.isinf(ratio) else ' (%+.1f%%)' % (100*ratio))) - def print_entries(by='function'): + def print_entries(by='name'): # build optional tree of dependencies def print_deps(entries, depth, print, filter=lambda _: True, @@ -346,7 +381,7 @@ def main(**args): print_entries(by='file') print_totals() else: - print_entries(by='function') + print_entries(by='name') print_totals() @@ -360,12 +395,16 @@ if __name__ == "__main__": or a list of paths. Defaults to %r." % CI_PATHS) parser.add_argument('-v', '--verbose', action='store_true', help="Output commands that run behind the scenes.") + parser.add_argument('-q', '--quiet', action='store_true', + help="Don't show anything, useful with -o.") parser.add_argument('-o', '--output', help="Specify CSV file to store results.") parser.add_argument('-u', '--use', help="Don't parse callgraph files, instead use this CSV file.") parser.add_argument('-d', '--diff', help="Specify CSV file to diff against.") + parser.add_argument('-m', '--merge', + help="Merge with an existing CSV file when writing to output.") parser.add_argument('-a', '--all', action='store_true', help="Show all functions, not just the ones that changed.") parser.add_argument('-A', '--everything', action='store_true', @@ -374,19 +413,17 @@ if __name__ == "__main__": help="Sort by stack limit.") parser.add_argument('-S', '--reverse-limit-sort', action='store_true', help="Sort by stack limit, but backwards.") - parser.add_argument('-f', '--frame-sort', action='store_true', + parser.add_argument('--frame-sort', action='store_true', help="Sort by stack frame size.") - parser.add_argument('-F', '--reverse-frame-sort', action='store_true', + parser.add_argument('--reverse-frame-sort', action='store_true', help="Sort by stack frame size, but backwards.") parser.add_argument('-L', '--depth', default=0, type=lambda x: int(x, 0), nargs='?', const=float('inf'), help="Depth of dependencies to show.") - parser.add_argument('--files', action='store_true', + parser.add_argument('-F', '--files', action='store_true', help="Show file-level calls.") - parser.add_argument('--summary', action='store_true', + parser.add_argument('-Y', '--summary', action='store_true', help="Only show the total stack size.") - parser.add_argument('-q', '--quiet', action='store_true', - help="Don't show anything, useful with -o.") parser.add_argument('--build-dir', help="Specify the relative build directory. Used to map object files \ to the correct source files.") diff --git a/scripts/structs.py b/scripts/structs.py index d608fc9..2ec166b 100755 --- a/scripts/structs.py +++ b/scripts/structs.py @@ -62,11 +62,24 @@ def collect(paths, **args): # map to source files if args.get('build_dir'): file = re.sub('%s/*' % re.escape(args['build_dir']), '', file) + # replace .o with .c, different scripts report .o/.c, we need to + # choose one if we want to deduplicate csv files + file = re.sub('\.o$', '.c', file) + flat_results.append((file, struct, size)) return flat_results def main(**args): + def openio(path, mode='r'): + if path == '-': + if 'r' in mode: + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + # find sizes if not args.get('use', None): # find .o files @@ -84,13 +97,14 @@ def main(**args): results = collect(paths, **args) else: - with open(args['use']) as f: + with openio(args['use']) as f: r = csv.DictReader(f) results = [ ( result['file'], - result['struct'], + result['name'], int(result['struct_size'])) - for result in r] + for result in r + if result.get('struct_size') not in {None, ''}] total = 0 for _, _, size in results: @@ -99,13 +113,14 @@ def main(**args): # find previous results? if args.get('diff'): try: - with open(args['diff']) as f: + with openio(args['diff']) as f: r = csv.DictReader(f) prev_results = [ ( result['file'], - result['struct'], + result['name'], int(result['struct_size'])) - for result in r] + for result in r + if result.get('struct_size') not in {None, ''}] except FileNotFoundError: prev_results = [] @@ -115,14 +130,34 @@ def main(**args): # write results to CSV if args.get('output'): - with open(args['output'], 'w') as f: - w = csv.writer(f) - w.writerow(['file', 'struct', 'struct_size']) - for file, struct, size in sorted(results): - w.writerow((file, struct, size)) + merged_results = co.defaultdict(lambda: {}) + other_fields = [] + + # merge? + if args.get('merge'): + try: + with openio(args['merge']) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + struct = result.pop('name', '') + result.pop('struct_size', None) + merged_results[(file, struct)] = result + other_fields = result.keys() + except FileNotFoundError: + pass + + for file, struct, size in results: + merged_results[(file, struct)]['struct_size'] = size + + with openio(args['output'], 'w') as f: + w = csv.DictWriter(f, ['file', 'name', *other_fields, 'struct_size']) + w.writeheader() + for (file, struct), result in sorted(merged_results.items()): + w.writerow({'file': file, 'name': struct, **result}) # print results - def dedup_entries(results, by='struct'): + def dedup_entries(results, by='name'): entries = co.defaultdict(lambda: 0) for file, struct, size in results: entry = (file if by == 'file' else struct) @@ -170,7 +205,7 @@ def main(**args): diff, ' (%+.1f%%)' % (100*ratio) if ratio else '')) - def print_entries(by='struct'): + def print_entries(by='name'): entries = dedup_entries(results, by=by) if not args.get('diff'): @@ -209,25 +244,29 @@ def main(**args): print_entries(by='file') print_totals() else: - print_entries(by='struct') + print_entries(by='name') print_totals() if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser( - description="Find code size at the function level.") + description="Find struct sizes.") parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS, help="Description of where to find *.o files. May be a directory \ or a list of paths. Defaults to %r." % OBJ_PATHS) parser.add_argument('-v', '--verbose', action='store_true', help="Output commands that run behind the scenes.") + parser.add_argument('-q', '--quiet', action='store_true', + help="Don't show anything, useful with -o.") parser.add_argument('-o', '--output', help="Specify CSV file to store results.") parser.add_argument('-u', '--use', help="Don't compile and find struct sizes, instead use this CSV file.") parser.add_argument('-d', '--diff', help="Specify CSV file to diff struct size against.") + parser.add_argument('-m', '--merge', + help="Merge with an existing CSV file when writing to output.") parser.add_argument('-a', '--all', action='store_true', help="Show all functions, not just the ones that changed.") parser.add_argument('-A', '--everything', action='store_true', @@ -236,12 +275,10 @@ if __name__ == "__main__": help="Sort by size.") parser.add_argument('-S', '--reverse-size-sort', action='store_true', help="Sort by size, but backwards.") - parser.add_argument('--files', action='store_true', + parser.add_argument('-F', '--files', action='store_true', help="Show file-level struct sizes.") - parser.add_argument('--summary', action='store_true', + parser.add_argument('-Y', '--summary', action='store_true', help="Only show the total struct size.") - parser.add_argument('-q', '--quiet', action='store_true', - help="Don't show anything, useful with -o.") parser.add_argument('--objdump-tool', default=['objdump'], type=lambda x: x.split(), help="Path to the objdump tool to use.") parser.add_argument('--build-dir', diff --git a/scripts/summary.py b/scripts/summary.py new file mode 100755 index 0000000..d9c9252 --- /dev/null +++ b/scripts/summary.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python3 +# +# Script to summarize the outputs of other scripts. Operates on CSV files. +# + +import functools as ft +import collections as co +import os +import csv +import re +import math as m + +# displayable fields +Field = co.namedtuple('Field', 'name,parse,acc,key,fmt,repr,null,ratio') +FIELDS = [ + # name, parse, accumulate, fmt, print, null + Field('code', + lambda r: int(r['code_size']), + sum, + lambda r: r, + '%7s', + lambda r: r, + '-', + lambda old, new: (new-old)/old), + Field('data', + lambda r: int(r['data_size']), + sum, + lambda r: r, + '%7s', + lambda r: r, + '-', + lambda old, new: (new-old)/old), + Field('stack', + lambda r: float(r['stack_limit']), + max, + lambda r: r, + '%7s', + lambda r: '∞' if m.isinf(r) else int(r), + '-', + lambda old, new: (new-old)/old), + Field('structs', + lambda r: int(r['struct_size']), + sum, + lambda r: r, + '%8s', + lambda r: r, + '-', + lambda old, new: (new-old)/old), + Field('coverage', + lambda r: (int(r['coverage_hits']), int(r['coverage_count'])), + lambda rs: ft.reduce(lambda a, b: (a[0]+b[0], a[1]+b[1]), rs), + lambda r: r[0]/r[1], + '%19s', + lambda r: '%11s %7s' % ('%d/%d' % (r[0], r[1]), '%.1f%%' % (100*r[0]/r[1])), + '%11s %7s' % ('-', '-'), + lambda old, new: ((new[0]/new[1]) - (old[0]/old[1]))) +] + + +def main(**args): + def openio(path, mode='r'): + if path == '-': + if 'r' in mode: + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + + # find results + results = co.defaultdict(lambda: {}) + for path in args.get('csv_paths', '-'): + try: + with openio(path) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + name = result.pop('name', '') + prev = results[(file, name)] + for field in FIELDS: + try: + r = field.parse(result) + if field.name in prev: + results[(file, name)][field.name] = field.acc( + [prev[field.name], r]) + else: + results[(file, name)][field.name] = r + except (KeyError, ValueError): + pass + except FileNotFoundError: + pass + + # find fields + if args.get('all_fields'): + fields = FIELDS + elif args.get('fields') is not None: + fields_dict = {field.name: field for field in FIELDS} + fields = [fields_dict[f] for f in args['fields']] + else: + fields = [] + for field in FIELDS: + if any(field.name in result for result in results.values()): + fields.append(field) + + # find total for every field + total = {} + for result in results.values(): + for field in fields: + if field.name in result and field.name in total: + total[field.name] = field.acc( + [total[field.name], result[field.name]]) + elif field.name in result: + total[field.name] = result[field.name] + + # find previous results? + if args.get('diff'): + prev_results = co.defaultdict(lambda: {}) + try: + with openio(args['diff']) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + name = result.pop('name', '') + prev = prev_results[(file, name)] + for field in FIELDS: + try: + r = field.parse(result) + if field.name in prev: + prev_results[(file, name)][field.name] = field.acc( + [prev[field.name], r]) + else: + prev_results[(file, name)][field.name] = r + except (KeyError, ValueError): + pass + except FileNotFoundError: + pass + + if args.get('all_fields'): + fields = FIELDS + elif args.get('fields') is not None: + fields_dict = {field.name: field for field in FIELDS} + fields = [fields_dict[f] for f in args['fields']] + else: + fields = [] + for field in FIELDS: + if any(field.name in result for result in prev_results.values()): + fields.append(field) + + prev_total = {} + for result in prev_results.values(): + for field in fields: + if field.name in result and field.name in prev_total: + prev_total[field.name] = field.acc( + [prev_total[field.name], result[field.name]]) + elif field.name in result: + prev_total[field.name] = result[field.name] + + # print results + def dedup_entries(results, by='name'): + entries = co.defaultdict(lambda: {}) + for (file, func), result in results.items(): + entry = (file if by == 'file' else func) + prev = entries[entry] + for field in fields: + if field.name in result and field.name in prev: + entries[entry][field.name] = field.acc( + [prev[field.name], result[field.name]]) + elif field.name in result: + entries[entry][field.name] = result[field.name] + return entries + + def sorted_entries(entries): + if args.get('sort') is not None: + field = {field.name: field for field in FIELDS}[args['sort']] + return sorted(entries, key=lambda x: ( + -(field.key(x[1][field.name])) if field.name in x[1] else -1, x)) + elif args.get('reverse_sort') is not None: + field = {field.name: field for field in FIELDS}[args['reverse_sort']] + return sorted(entries, key=lambda x: ( + +(field.key(x[1][field.name])) if field.name in x[1] else -1, x)) + else: + return sorted(entries) + + def print_header(by=''): + if not args.get('diff'): + print('%-36s' % by, end='') + for field in fields: + print((' '+field.fmt) % field.name, end='') + print() + else: + print('%-36s' % by, end='') + for field in fields: + print((' '+field.fmt) % field.name, end='') + print(' %-9s' % '', end='') + print() + + def print_entry(name, result): + print('%-36s' % name, end='') + for field in fields: + r = result.get(field.name) + if r is not None: + print((' '+field.fmt) % field.repr(r), end='') + else: + print((' '+field.fmt) % '-', end='') + print() + + def print_diff_entry(name, old, new): + print('%-36s' % name, end='') + for field in fields: + n = new.get(field.name) + if n is not None: + print((' '+field.fmt) % field.repr(n), end='') + else: + print((' '+field.fmt) % '-', end='') + o = old.get(field.name) + ratio = ( + 0.0 if m.isinf(o or 0) and m.isinf(n or 0) + else +float('inf') if m.isinf(n or 0) + else -float('inf') if m.isinf(o or 0) + else 0.0 if not o and not n + else +1.0 if not o + else -1.0 if not n + else field.ratio(o, n)) + print(' %-9s' % ( + '' if not ratio + else '(+∞%)' if ratio > 0 and m.isinf(ratio) + else '(-∞%)' if ratio < 0 and m.isinf(ratio) + else '(%+.1f%%)' % (100*ratio)), end='') + print() + + def print_entries(by='name'): + entries = dedup_entries(results, by=by) + + if not args.get('diff'): + print_header(by=by) + for name, result in sorted_entries(entries.items()): + print_entry(name, result) + else: + prev_entries = dedup_entries(prev_results, by=by) + print_header(by='%s (%d added, %d removed)' % (by, + sum(1 for name in entries if name not in prev_entries), + sum(1 for name in prev_entries if name not in entries))) + for name, result in sorted_entries(entries.items()): + if args.get('all') or result != prev_entries.get(name, {}): + print_diff_entry(name, prev_entries.get(name, {}), result) + + def print_totals(): + if not args.get('diff'): + print_entry('TOTAL', total) + else: + print_diff_entry('TOTAL', prev_total, total) + + if args.get('summary'): + print_header() + print_totals() + elif args.get('files'): + print_entries(by='file') + print_totals() + else: + print_entries(by='name') + print_totals() + + +if __name__ == "__main__": + import argparse + import sys + parser = argparse.ArgumentParser( + description="Summarize measurements") + parser.add_argument('csv_paths', nargs='*', default='-', + help="Description of where to find *.csv files. May be a directory \ + or list of paths. *.csv files will be merged to show the total \ + coverage.") + parser.add_argument('-d', '--diff', + help="Specify CSV file to diff against.") + parser.add_argument('-a', '--all', action='store_true', + help="Show all objects, not just the ones that changed.") + parser.add_argument('-e', '--all-fields', action='store_true', + help="Show all fields, even those with no results.") + parser.add_argument('-f', '--fields', type=lambda x: re.split('\s*,\s*', x), + help="Comma separated list of fields to print, by default all fields \ + that are found in the CSV files are printed.") + parser.add_argument('-s', '--sort', + help="Sort by this field.") + parser.add_argument('-S', '--reverse-sort', + help="Sort by this field, but backwards.") + parser.add_argument('-F', '--files', action='store_true', + help="Show file-level calls.") + parser.add_argument('-Y', '--summary', action='store_true', + help="Only show the totals.") + sys.exit(main(**vars(parser.parse_args())))