From 0a2ff3b6ff666482a05929b0492ed615e30a1c14 Mon Sep 17 00:00:00 2001 From: Christopher Haster Date: Sun, 20 Feb 2022 12:42:44 -0600 Subject: [PATCH] Added scripts/structs.py for getting sizes of structs Note this does include internal structs, so this should probably be limited to informative purposes. --- Makefile | 26 +++-- scripts/structs.py | 241 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 260 insertions(+), 7 deletions(-) create mode 100755 scripts/structs.py diff --git a/Makefile b/Makefile index 596a697..088925c 100644 --- a/Makefile +++ b/Makefile @@ -17,12 +17,13 @@ TARGET ?= $(BUILDDIR)lfs.a endif -CC ?= gcc -AR ?= ar -SIZE ?= size -CTAGS ?= ctags -NM ?= nm -LCOV ?= lcov +CC ?= gcc +AR ?= ar +SIZE ?= size +CTAGS ?= ctags +NM ?= nm +OBJDUMP ?= objdump +LCOV ?= lcov SRC ?= $(wildcard *.c) OBJ := $(SRC:%.c=$(BUILDDIR)%.o) @@ -31,13 +32,14 @@ ASM := $(SRC:%.c=$(BUILDDIR)%.s) CGI := $(SRC:%.c=$(BUILDDIR)%.ci) ifdef DEBUG -override CFLAGS += -O0 -g3 +override CFLAGS += -O0 else override CFLAGS += -Os endif ifdef TRACE override CFLAGS += -DLFS_YES_TRACE endif +override CFLAGS += -g3 override CFLAGS += -I. override CFLAGS += -std=c99 -Wall -pedantic override CFLAGS += -Wextra -Wshadow -Wjump-misses-init -Wundef @@ -48,6 +50,7 @@ override CALLSFLAGS += -v override CODEFLAGS += -v override DATAFLAGS += -v override STACKFLAGS += -v +override STRUCTSFLAGS += -v override COVERAGEFLAGS += -v endif ifdef EXEC @@ -59,15 +62,20 @@ override CALLSFLAGS += --build-dir="$(BUILDDIR:/=)" override CODEFLAGS += --build-dir="$(BUILDDIR:/=)" override DATAFLAGS += --build-dir="$(BUILDDIR:/=)" override STACKFLAGS += --build-dir="$(BUILDDIR:/=)" +override STRUCTSFLAGS += --build-dir="$(BUILDDIR:/=)" override COVERAGEFLAGS += --build-dir="$(BUILDDIR:/=)" endif ifneq ($(NM),nm) override CODEFLAGS += --nm-tool="$(NM)" override DATAFLAGS += --nm-tool="$(NM)" endif +ifneq ($(OBJDUMP),objdump) +override STRUCTSFLAGS += --objdump-tool="$(OBJDUMP)" +endif override CODEFLAGS += -S override DATAFLAGS += -S override STACKFLAGS += -S +override STRUCTSFLAGS += -S override COVERAGEFLAGS += -s @@ -102,6 +110,10 @@ calls: $(CGI) stack: $(CGI) ./scripts/stack.py $^ $(STACKFLAGS) +.PHONY: structs +structs: $(OBJ) + ./scripts/structs.py $^ $(STRUCTSFLAGS) + .PHONY: test test: ./scripts/test.py $(TESTFLAGS) diff --git a/scripts/structs.py b/scripts/structs.py new file mode 100755 index 0000000..c5ac783 --- /dev/null +++ b/scripts/structs.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +# +# Script to find struct sizes. +# + +import os +import glob +import itertools as it +import subprocess as sp +import shlex +import re +import csv +import collections as co + + +OBJ_PATHS = ['*.o'] + +def collect(paths, **args): + results = co.defaultdict(lambda: 0) + pattern = re.compile( + '^(?:.*DW_TAG_(?P[a-z_]+).*' + '|^.*DW_AT_name.*:\s*(?P[^:\s]+)\s*' + '|^.*DW_AT_byte_size.*:\s*(?P[0-9]+)\s*)$') + + for path in paths: + # collect structs as we parse dwarf info + found = False + name = None + size = None + + # note objdump-tool may contain extra args + cmd = args['objdump_tool'] + ['--dwarf=info', path] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=sp.PIPE if not args.get('verbose') else None, + universal_newlines=True) + for line in proc.stdout: + # state machine here to find structs + m = pattern.match(line) + if m: + if m.group('tag'): + if name is not None and size is not None: + results[(path, name)] = size + found = (m.group('tag') == 'structure_type') + name = None + size = None + elif found and m.group('name'): + name = m.group('name') + elif found and name and m.group('size'): + size = int(m.group('size')) + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stdout.write(line) + sys.exit(-1) + + flat_results = [] + for (file, struct), size in results.items(): + # map to source files + if args.get('build_dir'): + file = re.sub('%s/*' % re.escape(args['build_dir']), '', file) + flat_results.append((file, struct, size)) + + return flat_results + +def main(**args): + # find sizes + if not args.get('use', None): + # find .o files + paths = [] + for path in args['obj_paths']: + if os.path.isdir(path): + path = path + '/*.o' + + for path in glob.glob(path): + paths.append(path) + + if not paths: + print('no .obj files found in %r?' % args['obj_paths']) + sys.exit(-1) + + results = collect(paths, **args) + else: + with open(args['use']) as f: + r = csv.DictReader(f) + results = [ + ( result['file'], + result['struct'], + int(result['struct_size'])) + for result in r] + + total = 0 + for _, _, size in results: + total += size + + # find previous results? + if args.get('diff'): + with open(args['diff']) as f: + r = csv.DictReader(f) + prev_results = [ + ( result['file'], + result['struct'], + int(result['struct_size'])) + for result in r] + + prev_total = 0 + for _, _, size in prev_results: + prev_total += size + + # write results to CSV + if args.get('output'): + with open(args['output'], 'w') as f: + w = csv.writer(f) + w.writerow(['file', 'struct', 'struct_size']) + for file, struct, size in sorted(results): + w.writerow((file, struct, size)) + + # print results + def dedup_entries(results, by='struct'): + entries = co.defaultdict(lambda: 0) + for file, struct, size in results: + entry = (file if by == 'file' else struct) + entries[entry] += size + return entries + + def diff_entries(olds, news): + diff = co.defaultdict(lambda: (0, 0, 0, 0)) + for name, new in news.items(): + diff[name] = (0, new, new, 1.0) + for name, old in olds.items(): + _, new, _, _ = diff[name] + diff[name] = (old, new, new-old, (new-old)/old if old else 1.0) + return diff + + def sorted_entries(entries): + if args.get('size_sort'): + return sorted(entries, key=lambda x: (-x[1], x)) + elif args.get('reverse_size_sort'): + return sorted(entries, key=lambda x: (+x[1], x)) + else: + return sorted(entries) + + def sorted_diff_entries(entries): + if args.get('size_sort'): + return sorted(entries, key=lambda x: (-x[1][1], x)) + elif args.get('reverse_size_sort'): + return sorted(entries, key=lambda x: (+x[1][1], x)) + else: + return sorted(entries, key=lambda x: (-x[1][3], x)) + + def print_header(by=''): + if not args.get('diff'): + print('%-36s %7s' % (by, 'size')) + else: + print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff')) + + def print_entries(by='struct'): + entries = dedup_entries(results, by=by) + + if not args.get('diff'): + print_header(by=by) + for name, size in sorted_entries(entries.items()): + print("%-36s %7d" % (name, size)) + else: + prev_entries = dedup_entries(prev_results, by=by) + diff = diff_entries(prev_entries, entries) + print_header(by='%s (%d added, %d removed)' % (by, + sum(1 for old, _, _, _ in diff.values() if not old), + sum(1 for _, new, _, _ in diff.values() if not new))) + for name, (old, new, diff, ratio) in sorted_diff_entries( + diff.items()): + if ratio or args.get('all'): + print("%-36s %7s %7s %+7d%s" % (name, + old or "-", + new or "-", + diff, + ' (%+.1f%%)' % (100*ratio) if ratio else '')) + + def print_totals(): + if not args.get('diff'): + print("%-36s %7d" % ('TOTAL', total)) + else: + ratio = (total-prev_total)/prev_total if prev_total else 1.0 + print("%-36s %7s %7s %+7d%s" % ( + 'TOTAL', + prev_total if prev_total else '-', + total if total else '-', + total-prev_total, + ' (%+.1f%%)' % (100*ratio) if ratio else '')) + + if args.get('quiet'): + pass + elif args.get('summary'): + print_header() + print_totals() + elif args.get('files'): + print_entries(by='file') + print_totals() + else: + print_entries(by='struct') + print_totals() + +if __name__ == "__main__": + import argparse + import sys + parser = argparse.ArgumentParser( + description="Find code size at the function level.") + parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS, + help="Description of where to find *.o files. May be a directory \ + or a list of paths. Defaults to %r." % OBJ_PATHS) + parser.add_argument('-v', '--verbose', action='store_true', + help="Output commands that run behind the scenes.") + parser.add_argument('-o', '--output', + help="Specify CSV file to store results.") + parser.add_argument('-u', '--use', + help="Don't compile and find struct sizes, instead use this CSV file.") + parser.add_argument('-d', '--diff', + help="Specify CSV file to diff struct size against.") + parser.add_argument('-a', '--all', action='store_true', + help="Show all functions, not just the ones that changed.") + parser.add_argument('-A', '--everything', action='store_true', + help="Include builtin and libc specific symbols.") + parser.add_argument('-s', '--size-sort', action='store_true', + help="Sort by size.") + parser.add_argument('-S', '--reverse-size-sort', action='store_true', + help="Sort by size, but backwards.") + parser.add_argument('--files', action='store_true', + help="Show file-level struct sizes.") + parser.add_argument('--summary', action='store_true', + help="Only show the total struct size.") + parser.add_argument('-q', '--quiet', action='store_true', + help="Don't show anything, useful with -o.") + parser.add_argument('--objdump-tool', default=['objdump'], type=lambda x: x.split(), + help="Path to the objdump tool to use.") + parser.add_argument('--build-dir', + help="Specify the relative build directory. Used to map object files \ + to the correct source files.") + sys.exit(main(**vars(parser.parse_args())))