From 8475c8064d2cccba1790ebeb134b283d0ac912e8 Mon Sep 17 00:00:00 2001 From: Christopher Haster Date: Mon, 14 Mar 2022 00:35:43 -0500 Subject: [PATCH] Limit ./scripts/structs.py to report structs in local .h files This requires parsing an additional section of the dwarfinfo (--dwarf=rawlines) to get the declaration file info. --- Interpreting the results of ./scripts/structs.py reporting is a bit more complicated than other scripts, structs aren't used in a consistent manner so the cost of a large struct depends on the context in which it is used. But that being said, there really isn't much reason to report internal-only structs. These structs really only exist for type-checking in internal algorithms, and their cost will end up reflected in other RAM measurements, either stack, heap, or other. --- scripts/structs.py | 63 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/scripts/structs.py b/scripts/structs.py index e56ce9d..dfa65dd 100755 --- a/scripts/structs.py +++ b/scripts/structs.py @@ -16,16 +16,48 @@ import collections as co OBJ_PATHS = ['*.o'] def collect(paths, **args): - results = co.defaultdict(lambda: 0) - pattern = re.compile( + decl_pattern = re.compile( + '^\s+(?P[0-9]+)' + '\s+(?P[0-9]+)' + '\s+.*' + '\s+(?P[^\s]+)$') + struct_pattern = re.compile( '^(?:.*DW_TAG_(?P[a-z_]+).*' '|^.*DW_AT_name.*:\s*(?P[^:\s]+)\s*' + '|^.*DW_AT_decl_file.*:\s*(?P[0-9]+)\s*' '|^.*DW_AT_byte_size.*:\s*(?P[0-9]+)\s*)$') + results = co.defaultdict(lambda: 0) for path in paths: + # find decl, we want to filter by structs in .h files + decls = {} + # note objdump-tool may contain extra args + cmd = args['objdump_tool'] + ['--dwarf=rawline', path] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=sp.PIPE if not args.get('verbose') else None, + universal_newlines=True, + errors='replace') + for line in proc.stdout: + # find file numbers + m = decl_pattern.match(line) + if m: + decls[int(m.group('no'))] = ( + m.group('file'), + int(m.group('dir'))) + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stdout.write(line) + sys.exit(-1) + # collect structs as we parse dwarf info found = False name = None + decl = None size = None # note objdump-tool may contain extra args @@ -39,16 +71,22 @@ def collect(paths, **args): errors='replace') for line in proc.stdout: # state machine here to find structs - m = pattern.match(line) + m = struct_pattern.match(line) if m: if m.group('tag'): - if name is not None and size is not None: - results[(path, name)] = size + if (name is not None + and decl is not None + and size is not None): + decl_file, decl_dir = decls.get(decl, ('', 0)) + results[(path, name)] = (size, decl_file, decl_dir) found = (m.group('tag') == 'structure_type') name = None + decl = None size = None elif found and m.group('name'): name = m.group('name') + elif found and name and m.group('decl'): + decl = int(m.group('decl')) elif found and name and m.group('size'): size = int(m.group('size')) proc.wait() @@ -59,18 +97,25 @@ def collect(paths, **args): sys.exit(-1) flat_results = [] - for (file, struct), size in results.items(): + for (path, struct), (size, decl_file, decl_dir) in results.items(): # map to source files if args.get('build_dir'): - file = re.sub('%s/*' % re.escape(args['build_dir']), '', file) + path = re.sub('%s/*' % re.escape(args['build_dir']), '', path) + # only include structs declared in header files in the current + # directory, ignore internal-only # structs (these are represented + # in other measurements) + if not args.get('everything'): + if not (decl_file.endswith('.h') and decl_dir == 0): + continue # replace .o with .c, different scripts report .o/.c, we need to # choose one if we want to deduplicate csv files - file = re.sub('\.o$', '.c', file) + path = re.sub('\.o$', '.c', path) - flat_results.append((file, struct, size)) + flat_results.append((path, struct, size)) return flat_results + def main(**args): def openio(path, mode='r'): if path == '-':