From 55b3c538d55ef53b9871e6ba41376a2ceecc302a Mon Sep 17 00:00:00 2001
From: Christopher Haster <chaster@utexas.edu>
Date: Mon, 7 Mar 2022 00:24:30 -0600
Subject: [PATCH] Added ./script/summary.py

A full summary of static measurements (code size, stack usage, etc) can now
be found with:

    make summary

This is done through the combination of a new ./scripts/summary.py
script and the ability of existing scripts to merge into existing csv
files, allowing multiple results to be merged either in a pipeline, or
in parallel with a single ./script/summary.py call.

The ./scripts/summary.py script can also be used to quickly compare
different builds or configurations. This is a proper implementation
of a similar but hacky shell script that has already been very useful
for making optimization decisions:

    $ ./scripts/structs.py new.csv -d old.csv --summary
    name (2 added, 0 removed)               code             stack            structs
    TOTAL                                  28648 (-2.7%)      2448               1012

Also some other small tweaks to scripts:

- Removed state saving diff rules. This isn't the most useful way to
  handle comparing changes.

- Added short flags for --summary (-Y) and --files (-F), since these
  are quite often used.
---
 Makefile            |  34 +++---
 scripts/code.py     |  73 ++++++++---
 scripts/coverage.py |  69 ++++++++---
 scripts/data.py     |  72 ++++++++---
 scripts/stack.py    |  77 +++++++++---
 scripts/structs.py  |  75 +++++++++---
 scripts/summary.py  | 290 ++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 580 insertions(+), 110 deletions(-)
 create mode 100755 scripts/summary.py

diff --git a/Makefile b/Makefile
index 46773b7..bcd6f0e 100644
--- a/Makefile
+++ b/Makefile
@@ -56,6 +56,9 @@ endif
 ifdef EXEC
 override TESTFLAGS += --exec="$(EXEC)"
 endif
+ifdef COVERAGE
+override TESTFLAGS += --coverage
+endif
 ifdef BUILDDIR
 override TESTFLAGS     += --build-dir="$(BUILDDIR:/=)"
 override CALLSFLAGS    += --build-dir="$(BUILDDIR:/=)"
@@ -104,41 +107,34 @@ test%: tests/test$$(firstword $$(subst \#, ,%)).toml
 code: $(OBJ)
 	./scripts/code.py $^ -S $(CODEFLAGS)
 
-.PHONY: code-diff
-code-diff: $(OBJ)
-	./scripts/code.py $^ -d $(TARGET).code.csv -o $(TARGET).code.csv $(CODEFLAGS)
-
 .PHONY: data
 data: $(OBJ)
 	./scripts/data.py $^ -S $(DATAFLAGS)
 
-.PHONY: data-diff
-data-diff: $(OBJ)
-	./scripts/data.py $^ -d $(TARGET).data.csv -o $(TARGET).data.csv $(DATAFLAGS)
-
 .PHONY: stack
 stack: $(CGI)
 	./scripts/stack.py $^ -S $(STACKFLAGS)
 
-.PHONY: stack-diff
-stack-diff: $(CGI)
-	./scripts/stack.py $^ -d $(TARGET).stack.csv -o $(TARGET).stack.csv $(STACKFLAGS)
-
 .PHONY: structs
 structs: $(OBJ)
 	./scripts/structs.py $^ -S $(STRUCTSFLAGS)
 
-.PHONY: structs-diff
-structs-diff: $(OBJ)
-	./scripts/structs.py $^ -d $(TARGET).structs.csv -o $(TARGET).structs.csv $(STRUCTSFLAGS)
-
 .PHONY: coverage
 coverage:
 	./scripts/coverage.py $(BUILDDIR)tests/*.toml.info -s $(COVERAGEFLAGS)
 
-.PHONY: coverage-diff
-coverage-diff:
-	./scripts/coverage.py $(BUILDDIR)tests/*.toml.info $(COVERAGEFLAGS)
+.PHONY: summary
+summary: $(OBJ) $(CGI)
+	$(strip \
+		  ./scripts/code.py    $(OBJ) -q      -o - $(CODEFLAGS) \
+		| ./scripts/data.py    $(OBJ) -q -m - -o - $(DATAFLAGS) \
+		| ./scripts/stack.py   $(CGI) -q -m - -o - $(STACKFLAGS) \
+		| ./scripts/structs.py $(OBJ) -q -m - -o - $(STRUCTFLAGS) \
+		$(if $(COVERAGE),\
+			| ./scripts/coverage.py $(BUILDDIR)tests/*.toml.info \
+				-q -m - -o - $(COVERAGEFLAGS)) \
+		| ./scripts/summary.py $(SUMMARYFLAGS))
+
 
 # rules
 -include $(DEP)
diff --git a/scripts/code.py b/scripts/code.py
index 17be08a..73589c1 100755
--- a/scripts/code.py
+++ b/scripts/code.py
@@ -48,17 +48,30 @@ def collect(paths, **args):
         # map to source files
         if args.get('build_dir'):
             file = re.sub('%s/*' % re.escape(args['build_dir']), '', file)
+        # replace .o with .c, different scripts report .o/.c, we need to
+        # choose one if we want to deduplicate csv files
+        file = re.sub('\.o$', '.c', file)
         # discard internal functions
         if not args.get('everything'):
             if func.startswith('__'):
                 continue
         # discard .8449 suffixes created by optimizer
         func = re.sub('\.[0-9]+', '', func)
+
         flat_results.append((file, func, size))
 
     return flat_results
 
 def main(**args):
+    def openio(path, mode='r'):
+        if path == '-':
+            if 'r' in mode:
+                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            else:
+                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+        else:
+            return open(path, mode)
+
     # find sizes
     if not args.get('use', None):
         # find .o files
@@ -76,13 +89,14 @@ def main(**args):
 
         results = collect(paths, **args)
     else:
-        with open(args['use']) as f:
+        with openio(args['use']) as f:
             r = csv.DictReader(f)
             results = [
                 (   result['file'],
-                    result['function'],
+                    result['name'],
                     int(result['code_size']))
-                for result in r]
+                for result in r
+                if result.get('code_size') not in {None, ''}]
 
     total = 0
     for _, _, size in results:
@@ -91,13 +105,14 @@ def main(**args):
     # find previous results?
     if args.get('diff'):
         try:
-            with open(args['diff']) as f:
+            with openio(args['diff']) as f:
                 r = csv.DictReader(f)
                 prev_results = [
                     (   result['file'],
-                        result['function'],
+                        result['name'],
                         int(result['code_size']))
-                    for result in r]
+                    for result in r
+                    if result.get('code_size') not in {None, ''}]
         except FileNotFoundError:
             prev_results = []
 
@@ -107,14 +122,34 @@ def main(**args):
 
     # write results to CSV
     if args.get('output'):
-        with open(args['output'], 'w') as f:
-            w = csv.writer(f)
-            w.writerow(['file', 'function', 'code_size'])
-            for file, func, size in sorted(results):
-                w.writerow((file, func, size))
+        merged_results = co.defaultdict(lambda: {})
+        other_fields = []
+
+        # merge?
+        if args.get('merge'):
+            try:
+                with openio(args['merge']) as f:
+                    r = csv.DictReader(f)
+                    for result in r:
+                        file = result.pop('file', '')
+                        func = result.pop('name', '')
+                        result.pop('code_size', None)
+                        merged_results[(file, func)] = result
+                        other_fields = result.keys()
+            except FileNotFoundError:
+                pass
+
+        for file, func, size in results:
+            merged_results[(file, func)]['code_size'] = size
+
+        with openio(args['output'], 'w') as f:
+            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'code_size'])
+            w.writeheader()
+            for (file, func), result in sorted(merged_results.items()):
+                w.writerow({'file': file, 'name': func, **result})
 
     # print results
-    def dedup_entries(results, by='function'):
+    def dedup_entries(results, by='name'):
         entries = co.defaultdict(lambda: 0)
         for file, func, size in results:
             entry = (file if by == 'file' else func)
@@ -162,7 +197,7 @@ def main(**args):
             diff,
             ' (%+.1f%%)' % (100*ratio) if ratio else ''))
 
-    def print_entries(by='function'):
+    def print_entries(by='name'):
         entries = dedup_entries(results, by=by)
 
         if not args.get('diff'):
@@ -201,7 +236,7 @@ def main(**args):
         print_entries(by='file')
         print_totals()
     else:
-        print_entries(by='function')
+        print_entries(by='name')
         print_totals()
 
 if __name__ == "__main__":
@@ -214,12 +249,16 @@ if __name__ == "__main__":
             or a list of paths. Defaults to %r." % OBJ_PATHS)
     parser.add_argument('-v', '--verbose', action='store_true',
         help="Output commands that run behind the scenes.")
+    parser.add_argument('-q', '--quiet', action='store_true',
+        help="Don't show anything, useful with -o.")
     parser.add_argument('-o', '--output',
         help="Specify CSV file to store results.")
     parser.add_argument('-u', '--use',
         help="Don't compile and find code sizes, instead use this CSV file.")
     parser.add_argument('-d', '--diff',
         help="Specify CSV file to diff code size against.")
+    parser.add_argument('-m', '--merge',
+        help="Merge with an existing CSV file when writing to output.")
     parser.add_argument('-a', '--all', action='store_true',
         help="Show all functions, not just the ones that changed.")
     parser.add_argument('-A', '--everything', action='store_true',
@@ -228,13 +267,11 @@ if __name__ == "__main__":
         help="Sort by size.")
     parser.add_argument('-S', '--reverse-size-sort', action='store_true',
         help="Sort by size, but backwards.")
-    parser.add_argument('--files', action='store_true',
+    parser.add_argument('-F', '--files', action='store_true',
         help="Show file-level code sizes. Note this does not include padding! "
             "So sizes may differ from other tools.")
-    parser.add_argument('--summary', action='store_true',
+    parser.add_argument('-Y', '--summary', action='store_true',
         help="Only show the total code size.")
-    parser.add_argument('-q', '--quiet', action='store_true',
-        help="Don't show anything, useful with -o.")
     parser.add_argument('--type', default='tTrRdD',
         help="Type of symbols to report, this uses the same single-character "
             "type-names emitted by nm. Defaults to %(default)r.")
diff --git a/scripts/coverage.py b/scripts/coverage.py
index 0790b8a..b3a90ed 100755
--- a/scripts/coverage.py
+++ b/scripts/coverage.py
@@ -66,6 +66,15 @@ def collect(paths, **args):
 
 
 def main(**args):
+    def openio(path, mode='r'):
+        if path == '-':
+            if 'r' in mode:
+                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            else:
+                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+        else:
+            return open(path, mode)
+
     # find coverage
     if not args.get('use'):
         # find *.info files
@@ -83,14 +92,16 @@ def main(**args):
 
         results = collect(paths, **args)
     else:
-        with open(args['use']) as f:
+        with openio(args['use']) as f:
             r = csv.DictReader(f)
             results = [
                 (   result['file'],
-                    result['function'],
+                    result['name'],
                     int(result['coverage_hits']),
                     int(result['coverage_count']))
-                for result in r]
+                for result in r
+                if result.get('coverage_hits') not in {None, ''}
+                if result.get('coverage_count') not in {None, ''}]
 
     total_hits, total_count = 0, 0
     for _, _, hits, count in results:
@@ -100,14 +111,16 @@ def main(**args):
     # find previous results?
     if args.get('diff'):
         try:
-            with open(args['diff']) as f:
+            with openio(args['diff']) as f:
                 r = csv.DictReader(f)
                 prev_results = [
                     (   result['file'],
-                        result['function'],
+                        result['name'],
                         int(result['coverage_hits']),
                         int(result['coverage_count']))
-                    for result in r]
+                    for result in r
+                    if result.get('coverage_hits') not in {None, ''}
+                    if result.get('coverage_count') not in {None, ''}]
         except FileNotFoundError:
             prev_results = []
 
@@ -118,14 +131,36 @@ def main(**args):
 
     # write results to CSV
     if args.get('output'):
-        with open(args['output'], 'w') as f:
-            w = csv.writer(f)
-            w.writerow(['file', 'function', 'coverage_hits', 'coverage_count'])
-            for file, func, hits, count in sorted(results):
-                w.writerow((file, func, hits, count))
+        merged_results = co.defaultdict(lambda: {})
+        other_fields = []
+
+        # merge?
+        if args.get('merge'):
+            try:
+                with openio(args['merge']) as f:
+                    r = csv.DictReader(f)
+                    for result in r:
+                        file = result.pop('file', '')
+                        func = result.pop('name', '')
+                        result.pop('coverage_hits', None)
+                        result.pop('coverage_count', None)
+                        merged_results[(file, func)] = result
+                        other_fields = result.keys()
+            except FileNotFoundError:
+                pass
+
+        for file, func, hits, count in results:
+            merged_results[(file, func)]['coverage_hits'] = hits
+            merged_results[(file, func)]['coverage_count'] = count
+
+        with openio(args['output'], 'w') as f:
+            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'coverage_hits', 'coverage_count'])
+            w.writeheader()
+            for (file, func), result in sorted(merged_results.items()):
+                w.writerow({'file': file, 'name': func, **result})
 
     # print results
-    def dedup_entries(results, by='function'):
+    def dedup_entries(results, by='name'):
         entries = co.defaultdict(lambda: (0, 0))
         for file, func, hits, count in results:
             entry = (file if by == 'file' else func)
@@ -197,7 +232,7 @@ def main(**args):
             '%+d/%+d' % (diff_hits, diff_count),
             ' (%+.1f%%)' % (100*ratio) if ratio else ''))
 
-    def print_entries(by='function'):
+    def print_entries(by='name'):
         entries = dedup_entries(results, by=by)
 
         if not args.get('diff'):
@@ -245,7 +280,7 @@ def main(**args):
         print_entries(by='file')
         print_totals()
     else:
-        print_entries(by='function')
+        print_entries(by='name')
         print_totals()
 
 if __name__ == "__main__":
@@ -266,6 +301,8 @@ if __name__ == "__main__":
         help="Don't do any work, instead use this CSV file.")
     parser.add_argument('-d', '--diff',
         help="Specify CSV file to diff code size against.")
+    parser.add_argument('-m', '--merge',
+        help="Merge with an existing CSV file when writing to output.")
     parser.add_argument('-a', '--all', action='store_true',
         help="Show all functions, not just the ones that changed.")
     parser.add_argument('-A', '--everything', action='store_true',
@@ -274,9 +311,9 @@ if __name__ == "__main__":
         help="Sort by coverage.")
     parser.add_argument('-S', '--reverse-coverage-sort', action='store_true',
         help="Sort by coverage, but backwards.")
-    parser.add_argument('--files', action='store_true',
+    parser.add_argument('-F', '--files', action='store_true',
         help="Show file-level coverage.")
-    parser.add_argument('--summary', action='store_true',
+    parser.add_argument('-Y', '--summary', action='store_true',
         help="Only show the total coverage.")
     parser.add_argument('-q', '--quiet', action='store_true',
         help="Don't show anything, useful with -o.")
diff --git a/scripts/data.py b/scripts/data.py
index 5ef049e..ba87fac 100755
--- a/scripts/data.py
+++ b/scripts/data.py
@@ -48,6 +48,9 @@ def collect(paths, **args):
         # map to source files
         if args.get('build_dir'):
             file = re.sub('%s/*' % re.escape(args['build_dir']), '', file)
+        # replace .o with .c, different scripts report .o/.c, we need to
+        # choose one if we want to deduplicate csv files
+        file = re.sub('\.o$', '.c', file)
         # discard internal functions
         if not args.get('everything'):
             if func.startswith('__'):
@@ -59,6 +62,15 @@ def collect(paths, **args):
     return flat_results
 
 def main(**args):
+    def openio(path, mode='r'):
+        if path == '-':
+            if 'r' in mode:
+                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            else:
+                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+        else:
+            return open(path, mode)
+
     # find sizes
     if not args.get('use', None):
         # find .o files
@@ -76,13 +88,14 @@ def main(**args):
 
         results = collect(paths, **args)
     else:
-        with open(args['use']) as f:
+        with openio(args['use']) as f:
             r = csv.DictReader(f)
             results = [
                 (   result['file'],
-                    result['function'],
+                    result['name'],
                     int(result['data_size']))
-                for result in r]
+                for result in r
+                if result.get('data_size') not in {None, ''}]
 
     total = 0
     for _, _, size in results:
@@ -91,13 +104,14 @@ def main(**args):
     # find previous results?
     if args.get('diff'):
         try:
-            with open(args['diff']) as f:
+            with openio(args['diff']) as f:
                 r = csv.DictReader(f)
                 prev_results = [
                     (   result['file'],
-                        result['function'],
+                        result['name'],
                         int(result['data_size']))
-                    for result in r]
+                    for result in r
+                    if result.get('data_size') not in {None, ''}]
         except FileNotFoundError:
             prev_results = []
 
@@ -107,14 +121,34 @@ def main(**args):
 
     # write results to CSV
     if args.get('output'):
-        with open(args['output'], 'w') as f:
-            w = csv.writer(f)
-            w.writerow(['file', 'function', 'data_size'])
-            for file, func, size in sorted(results):
-                w.writerow((file, func, size))
+        merged_results = co.defaultdict(lambda: {})
+        other_fields = []
+
+        # merge?
+        if args.get('merge'):
+            try:
+                with openio(args['merge']) as f:
+                    r = csv.DictReader(f)
+                    for result in r:
+                        file = result.pop('file', '')
+                        func = result.pop('name', '')
+                        result.pop('data_size', None)
+                        merged_results[(file, func)] = result
+                        other_fields = result.keys()
+            except FileNotFoundError:
+                pass
+
+        for file, func, size in results:
+            merged_results[(file, func)]['data_size'] = size
+
+        with openio(args['output'], 'w') as f:
+            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'data_size'])
+            w.writeheader()
+            for (file, func), result in sorted(merged_results.items()):
+                w.writerow({'file': file, 'name': func, **result})
 
     # print results
-    def dedup_entries(results, by='function'):
+    def dedup_entries(results, by='name'):
         entries = co.defaultdict(lambda: 0)
         for file, func, size in results:
             entry = (file if by == 'file' else func)
@@ -162,7 +196,7 @@ def main(**args):
             diff,
             ' (%+.1f%%)' % (100*ratio) if ratio else ''))
 
-    def print_entries(by='function'):
+    def print_entries(by='name'):
         entries = dedup_entries(results, by=by)
 
         if not args.get('diff'):
@@ -201,7 +235,7 @@ def main(**args):
         print_entries(by='file')
         print_totals()
     else:
-        print_entries(by='function')
+        print_entries(by='name')
         print_totals()
 
 if __name__ == "__main__":
@@ -214,12 +248,16 @@ if __name__ == "__main__":
             or a list of paths. Defaults to %r." % OBJ_PATHS)
     parser.add_argument('-v', '--verbose', action='store_true',
         help="Output commands that run behind the scenes.")
+    parser.add_argument('-q', '--quiet', action='store_true',
+        help="Don't show anything, useful with -o.")
     parser.add_argument('-o', '--output',
         help="Specify CSV file to store results.")
     parser.add_argument('-u', '--use',
         help="Don't compile and find data sizes, instead use this CSV file.")
     parser.add_argument('-d', '--diff',
         help="Specify CSV file to diff data size against.")
+    parser.add_argument('-m', '--merge',
+        help="Merge with an existing CSV file when writing to output.")
     parser.add_argument('-a', '--all', action='store_true',
         help="Show all functions, not just the ones that changed.")
     parser.add_argument('-A', '--everything', action='store_true',
@@ -228,13 +266,11 @@ if __name__ == "__main__":
         help="Sort by size.")
     parser.add_argument('-S', '--reverse-size-sort', action='store_true',
         help="Sort by size, but backwards.")
-    parser.add_argument('--files', action='store_true',
+    parser.add_argument('-F', '--files', action='store_true',
         help="Show file-level data sizes. Note this does not include padding! "
             "So sizes may differ from other tools.")
-    parser.add_argument('--summary', action='store_true',
+    parser.add_argument('-Y', '--summary', action='store_true',
         help="Only show the total data size.")
-    parser.add_argument('-q', '--quiet', action='store_true',
-        help="Don't show anything, useful with -o.")
     parser.add_argument('--type', default='dDbB',
         help="Type of symbols to report, this uses the same single-character "
             "type-names emitted by nm. Defaults to %(default)r.")
diff --git a/scripts/stack.py b/scripts/stack.py
index cfa7ddb..0c652d8 100755
--- a/scripts/stack.py
+++ b/scripts/stack.py
@@ -116,6 +116,15 @@ def collect(paths, **args):
     return flat_results
 
 def main(**args):
+    def openio(path, mode='r'):
+        if path == '-':
+            if 'r' in mode:
+                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            else:
+                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+        else:
+            return open(path, mode)
+
     # find sizes
     if not args.get('use', None):
         # find .ci files
@@ -133,15 +142,17 @@ def main(**args):
 
         results = collect(paths, **args)
     else:
-        with open(args['use']) as f:
+        with openio(args['use']) as f:
             r = csv.DictReader(f)
             results = [
                 (   result['file'],
-                    result['function'],
+                    result['name'],
                     int(result['stack_frame']),
                     float(result['stack_limit']), # note limit can be inf
                     set())
-                for result in r]
+                for result in r
+                if result.get('stack_frame') not in {None, ''}
+                if result.get('stack_limit') not in {None, ''}]
 
     total_frame = 0
     total_limit = 0
@@ -152,15 +163,17 @@ def main(**args):
     # find previous results?
     if args.get('diff'):
         try:
-            with open(args['diff']) as f:
+            with openio(args['diff']) as f:
                 r = csv.DictReader(f)
                 prev_results = [
                     (   result['file'],
-                        result['function'],
+                        result['name'],
                         int(result['stack_frame']),
                         float(result['stack_limit']),
                         set())
-                    for result in r]
+                    for result in r
+                    if result.get('stack_frame') not in {None, ''}
+                    if result.get('stack_limit') not in {None, ''}]
         except FileNotFoundError:
             prev_results = []
 
@@ -172,14 +185,36 @@ def main(**args):
 
     # write results to CSV
     if args.get('output'):
-        with open(args['output'], 'w') as f:
-            w = csv.writer(f)
-            w.writerow(['file', 'function', 'stack_frame', 'stack_limit'])
-            for file, func, frame, limit, _ in sorted(results):
-                w.writerow((file, func, frame, limit))
+        merged_results = co.defaultdict(lambda: {})
+        other_fields = []
+
+        # merge?
+        if args.get('merge'):
+            try:
+                with openio(args['merge']) as f:
+                    r = csv.DictReader(f)
+                    for result in r:
+                        file = result.pop('file', '')
+                        func = result.pop('name', '')
+                        result.pop('stack_frame', None)
+                        result.pop('stack_limit', None)
+                        merged_results[(file, func)] = result
+                        other_fields = result.keys()
+            except FileNotFoundError:
+                pass
+
+        for file, func, frame, limit, _ in results:
+            merged_results[(file, func)]['stack_frame'] = frame
+            merged_results[(file, func)]['stack_limit'] = limit
+
+        with openio(args['output'], 'w') as f:
+            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'stack_frame', 'stack_limit'])
+            w.writeheader()
+            for (file, func), result in sorted(merged_results.items()):
+                w.writerow({'file': file, 'name': func, **result})
 
     # print results
-    def dedup_entries(results, by='function'):
+    def dedup_entries(results, by='name'):
         entries = co.defaultdict(lambda: (0, 0, set()))
         for file, func, frame, limit, deps in results:
             entry = (file if by == 'file' else func)
@@ -272,7 +307,7 @@ def main(**args):
                 else ' (-∞%)' if ratio < 0 and m.isinf(ratio)
                 else ' (%+.1f%%)' % (100*ratio)))
 
-    def print_entries(by='function'):
+    def print_entries(by='name'):
         # build optional tree of dependencies
         def print_deps(entries, depth, print,
                 filter=lambda _: True,
@@ -346,7 +381,7 @@ def main(**args):
         print_entries(by='file')
         print_totals()
     else:
-        print_entries(by='function')
+        print_entries(by='name')
         print_totals()
 
 
@@ -360,12 +395,16 @@ if __name__ == "__main__":
             or a list of paths. Defaults to %r." % CI_PATHS)
     parser.add_argument('-v', '--verbose', action='store_true',
         help="Output commands that run behind the scenes.")
+    parser.add_argument('-q', '--quiet', action='store_true',
+        help="Don't show anything, useful with -o.")
     parser.add_argument('-o', '--output',
         help="Specify CSV file to store results.")
     parser.add_argument('-u', '--use',
         help="Don't parse callgraph files, instead use this CSV file.")
     parser.add_argument('-d', '--diff',
         help="Specify CSV file to diff against.")
+    parser.add_argument('-m', '--merge',
+        help="Merge with an existing CSV file when writing to output.")
     parser.add_argument('-a', '--all', action='store_true',
         help="Show all functions, not just the ones that changed.")
     parser.add_argument('-A', '--everything', action='store_true',
@@ -374,19 +413,17 @@ if __name__ == "__main__":
         help="Sort by stack limit.")
     parser.add_argument('-S', '--reverse-limit-sort', action='store_true',
         help="Sort by stack limit, but backwards.")
-    parser.add_argument('-f', '--frame-sort', action='store_true',
+    parser.add_argument('--frame-sort', action='store_true',
         help="Sort by stack frame size.")
-    parser.add_argument('-F', '--reverse-frame-sort', action='store_true',
+    parser.add_argument('--reverse-frame-sort', action='store_true',
         help="Sort by stack frame size, but backwards.")
     parser.add_argument('-L', '--depth', default=0, type=lambda x: int(x, 0),
         nargs='?', const=float('inf'),
         help="Depth of dependencies to show.")
-    parser.add_argument('--files', action='store_true',
+    parser.add_argument('-F', '--files', action='store_true',
         help="Show file-level calls.")
-    parser.add_argument('--summary', action='store_true',
+    parser.add_argument('-Y', '--summary', action='store_true',
         help="Only show the total stack size.")
-    parser.add_argument('-q', '--quiet', action='store_true',
-        help="Don't show anything, useful with -o.")
     parser.add_argument('--build-dir',
         help="Specify the relative build directory. Used to map object files \
             to the correct source files.")
diff --git a/scripts/structs.py b/scripts/structs.py
index d608fc9..2ec166b 100755
--- a/scripts/structs.py
+++ b/scripts/structs.py
@@ -62,11 +62,24 @@ def collect(paths, **args):
         # map to source files
         if args.get('build_dir'):
             file = re.sub('%s/*' % re.escape(args['build_dir']), '', file)
+        # replace .o with .c, different scripts report .o/.c, we need to
+        # choose one if we want to deduplicate csv files
+        file = re.sub('\.o$', '.c', file)
+
         flat_results.append((file, struct, size))
 
     return flat_results
 
 def main(**args):
+    def openio(path, mode='r'):
+        if path == '-':
+            if 'r' in mode:
+                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            else:
+                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+        else:
+            return open(path, mode)
+
     # find sizes
     if not args.get('use', None):
         # find .o files
@@ -84,13 +97,14 @@ def main(**args):
 
         results = collect(paths, **args)
     else:
-        with open(args['use']) as f:
+        with openio(args['use']) as f:
             r = csv.DictReader(f)
             results = [
                 (   result['file'],
-                    result['struct'],
+                    result['name'],
                     int(result['struct_size']))
-                for result in r]
+                for result in r
+                if result.get('struct_size') not in {None, ''}]
 
     total = 0
     for _, _, size in results:
@@ -99,13 +113,14 @@ def main(**args):
     # find previous results?
     if args.get('diff'):
         try:
-            with open(args['diff']) as f:
+            with openio(args['diff']) as f:
                 r = csv.DictReader(f)
                 prev_results = [
                     (   result['file'],
-                        result['struct'],
+                        result['name'],
                         int(result['struct_size']))
-                    for result in r]
+                    for result in r
+                    if result.get('struct_size') not in {None, ''}]
         except FileNotFoundError:
             prev_results = []
 
@@ -115,14 +130,34 @@ def main(**args):
 
     # write results to CSV
     if args.get('output'):
-        with open(args['output'], 'w') as f:
-            w = csv.writer(f)
-            w.writerow(['file', 'struct', 'struct_size'])
-            for file, struct, size in sorted(results):
-                w.writerow((file, struct, size))
+        merged_results = co.defaultdict(lambda: {})
+        other_fields = []
+
+        # merge?
+        if args.get('merge'):
+            try:
+                with openio(args['merge']) as f:
+                    r = csv.DictReader(f)
+                    for result in r:
+                        file = result.pop('file', '')
+                        struct = result.pop('name', '')
+                        result.pop('struct_size', None)
+                        merged_results[(file, struct)] = result
+                        other_fields = result.keys()
+            except FileNotFoundError:
+                pass
+
+        for file, struct, size in results:
+            merged_results[(file, struct)]['struct_size'] = size
+
+        with openio(args['output'], 'w') as f:
+            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'struct_size'])
+            w.writeheader()
+            for (file, struct), result in sorted(merged_results.items()):
+                w.writerow({'file': file, 'name': struct, **result})
 
     # print results
-    def dedup_entries(results, by='struct'):
+    def dedup_entries(results, by='name'):
         entries = co.defaultdict(lambda: 0)
         for file, struct, size in results:
             entry = (file if by == 'file' else struct)
@@ -170,7 +205,7 @@ def main(**args):
             diff,
             ' (%+.1f%%)' % (100*ratio) if ratio else ''))
 
-    def print_entries(by='struct'):
+    def print_entries(by='name'):
         entries = dedup_entries(results, by=by)
 
         if not args.get('diff'):
@@ -209,25 +244,29 @@ def main(**args):
         print_entries(by='file')
         print_totals()
     else:
-        print_entries(by='struct')
+        print_entries(by='name')
         print_totals()
 
 if __name__ == "__main__":
     import argparse
     import sys
     parser = argparse.ArgumentParser(
-        description="Find code size at the function level.")
+        description="Find struct sizes.")
     parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
         help="Description of where to find *.o files. May be a directory \
             or a list of paths. Defaults to %r." % OBJ_PATHS)
     parser.add_argument('-v', '--verbose', action='store_true',
         help="Output commands that run behind the scenes.")
+    parser.add_argument('-q', '--quiet', action='store_true',
+        help="Don't show anything, useful with -o.")
     parser.add_argument('-o', '--output',
         help="Specify CSV file to store results.")
     parser.add_argument('-u', '--use',
         help="Don't compile and find struct sizes, instead use this CSV file.")
     parser.add_argument('-d', '--diff',
         help="Specify CSV file to diff struct size against.")
+    parser.add_argument('-m', '--merge',
+        help="Merge with an existing CSV file when writing to output.")
     parser.add_argument('-a', '--all', action='store_true',
         help="Show all functions, not just the ones that changed.")
     parser.add_argument('-A', '--everything', action='store_true',
@@ -236,12 +275,10 @@ if __name__ == "__main__":
         help="Sort by size.")
     parser.add_argument('-S', '--reverse-size-sort', action='store_true',
         help="Sort by size, but backwards.")
-    parser.add_argument('--files', action='store_true',
+    parser.add_argument('-F', '--files', action='store_true',
         help="Show file-level struct sizes.")
-    parser.add_argument('--summary', action='store_true',
+    parser.add_argument('-Y', '--summary', action='store_true',
         help="Only show the total struct size.")
-    parser.add_argument('-q', '--quiet', action='store_true',
-        help="Don't show anything, useful with -o.")
     parser.add_argument('--objdump-tool', default=['objdump'], type=lambda x: x.split(),
         help="Path to the objdump tool to use.")
     parser.add_argument('--build-dir',
diff --git a/scripts/summary.py b/scripts/summary.py
new file mode 100755
index 0000000..d9c9252
--- /dev/null
+++ b/scripts/summary.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+#
+# Script to summarize the outputs of other scripts. Operates on CSV files.
+#
+
+import functools as ft
+import collections as co
+import os
+import csv
+import re
+import math as m
+
+# displayable fields
+Field = co.namedtuple('Field', 'name,parse,acc,key,fmt,repr,null,ratio')
+FIELDS = [
+    # name, parse, accumulate, fmt, print, null
+    Field('code',
+        lambda r: int(r['code_size']),
+        sum,
+        lambda r: r,
+        '%7s',
+        lambda r: r,
+        '-',
+        lambda old, new: (new-old)/old),
+    Field('data',
+        lambda r: int(r['data_size']),
+        sum,
+        lambda r: r,
+        '%7s',
+        lambda r: r,
+        '-',
+        lambda old, new: (new-old)/old),
+    Field('stack',
+        lambda r: float(r['stack_limit']),
+        max,
+        lambda r: r,
+        '%7s',
+        lambda r: '∞' if m.isinf(r) else int(r),
+        '-',
+        lambda old, new: (new-old)/old),
+    Field('structs',
+        lambda r: int(r['struct_size']),
+        sum,
+        lambda r: r,
+        '%8s',
+        lambda r: r,
+        '-',
+        lambda old, new: (new-old)/old),
+    Field('coverage',
+        lambda r: (int(r['coverage_hits']), int(r['coverage_count'])),
+        lambda rs: ft.reduce(lambda a, b: (a[0]+b[0], a[1]+b[1]), rs),
+        lambda r: r[0]/r[1],
+        '%19s',
+        lambda r: '%11s %7s' % ('%d/%d' % (r[0], r[1]), '%.1f%%' % (100*r[0]/r[1])),
+        '%11s %7s' % ('-', '-'),
+        lambda old, new: ((new[0]/new[1]) - (old[0]/old[1])))
+]
+
+
+def main(**args):
+    def openio(path, mode='r'):
+        if path == '-':
+            if 'r' in mode:
+                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            else:
+                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+        else:
+            return open(path, mode)
+
+    # find results
+    results = co.defaultdict(lambda: {})
+    for path in args.get('csv_paths', '-'):
+        try:
+            with openio(path) as f:
+                r = csv.DictReader(f)
+                for result in r:
+                    file = result.pop('file', '')
+                    name = result.pop('name', '')
+                    prev = results[(file, name)]
+                    for field in FIELDS:
+                        try:
+                            r = field.parse(result)
+                            if field.name in prev:
+                                results[(file, name)][field.name] = field.acc(
+                                    [prev[field.name], r])
+                            else:
+                                results[(file, name)][field.name] = r
+                        except (KeyError, ValueError):
+                            pass
+        except FileNotFoundError:
+            pass
+
+    # find fields
+    if args.get('all_fields'):
+        fields = FIELDS
+    elif args.get('fields') is not None:
+        fields_dict = {field.name: field for field in FIELDS}
+        fields = [fields_dict[f] for f in args['fields']]
+    else:
+        fields = []
+        for field in FIELDS:
+            if any(field.name in result for result in results.values()):
+                fields.append(field)
+
+    # find total for every field
+    total = {}
+    for result in results.values():
+        for field in fields:
+            if field.name in result and field.name in total:
+                total[field.name] = field.acc(
+                    [total[field.name], result[field.name]])
+            elif field.name in result:
+                total[field.name] = result[field.name]
+
+    # find previous results?
+    if args.get('diff'):
+        prev_results = co.defaultdict(lambda: {})
+        try:
+            with openio(args['diff']) as f:
+                r = csv.DictReader(f)
+                for result in r:
+                    file = result.pop('file', '')
+                    name = result.pop('name', '')
+                    prev = prev_results[(file, name)]
+                    for field in FIELDS:
+                        try:
+                            r = field.parse(result)
+                            if field.name in prev:
+                                prev_results[(file, name)][field.name] = field.acc(
+                                    [prev[field.name], r])
+                            else:
+                                prev_results[(file, name)][field.name] = r
+                        except (KeyError, ValueError):
+                            pass
+        except FileNotFoundError:
+            pass
+
+        if args.get('all_fields'):
+            fields = FIELDS
+        elif args.get('fields') is not None:
+            fields_dict = {field.name: field for field in FIELDS}
+            fields = [fields_dict[f] for f in args['fields']]
+        else:
+            fields = []
+            for field in FIELDS:
+                if any(field.name in result for result in prev_results.values()):
+                    fields.append(field)
+
+        prev_total = {}
+        for result in prev_results.values():
+            for field in fields:
+                if field.name in result and field.name in prev_total:
+                    prev_total[field.name] = field.acc(
+                        [prev_total[field.name], result[field.name]])
+                elif field.name in result:
+                    prev_total[field.name] = result[field.name]
+
+    # print results
+    def dedup_entries(results, by='name'):
+        entries = co.defaultdict(lambda: {})
+        for (file, func), result in results.items():
+            entry = (file if by == 'file' else func)
+            prev = entries[entry]
+            for field in fields:
+                if field.name in result and field.name in prev:
+                    entries[entry][field.name] = field.acc(
+                        [prev[field.name], result[field.name]])
+                elif field.name in result:
+                    entries[entry][field.name] = result[field.name]
+        return entries
+
+    def sorted_entries(entries):
+        if args.get('sort') is not None:
+            field = {field.name: field for field in FIELDS}[args['sort']]
+            return sorted(entries, key=lambda x: (
+                -(field.key(x[1][field.name])) if field.name in x[1] else -1, x))
+        elif args.get('reverse_sort') is not None:
+            field = {field.name: field for field in FIELDS}[args['reverse_sort']]
+            return sorted(entries, key=lambda x: (
+                +(field.key(x[1][field.name])) if field.name in x[1] else -1, x))
+        else:
+            return sorted(entries)
+
+    def print_header(by=''):
+        if not args.get('diff'):
+            print('%-36s' % by, end='')
+            for field in fields:
+                print((' '+field.fmt) % field.name, end='')
+            print()
+        else:
+            print('%-36s' % by, end='')
+            for field in fields:
+                print((' '+field.fmt) % field.name, end='')
+                print(' %-9s' % '', end='')
+            print()
+
+    def print_entry(name, result):
+        print('%-36s' % name, end='')
+        for field in fields:
+            r = result.get(field.name)
+            if r is not None:
+                print((' '+field.fmt) % field.repr(r), end='')
+            else:
+                print((' '+field.fmt) % '-', end='')
+        print()
+
+    def print_diff_entry(name, old, new):
+        print('%-36s' % name, end='')
+        for field in fields:
+            n = new.get(field.name)
+            if n is not None:
+                print((' '+field.fmt) % field.repr(n), end='')
+            else:
+                print((' '+field.fmt) % '-', end='')
+            o = old.get(field.name)
+            ratio = (
+                0.0 if m.isinf(o or 0) and m.isinf(n or 0)
+                    else +float('inf') if m.isinf(n or 0)
+                    else -float('inf') if m.isinf(o or 0)
+                    else 0.0 if not o and not n
+                    else +1.0 if not o
+                    else -1.0 if not n
+                    else field.ratio(o, n))
+            print(' %-9s' % (
+                '' if not ratio
+                    else '(+∞%)' if ratio > 0 and m.isinf(ratio)
+                    else '(-∞%)' if ratio < 0 and m.isinf(ratio)
+                    else '(%+.1f%%)' % (100*ratio)), end='')
+        print()
+
+    def print_entries(by='name'):
+        entries = dedup_entries(results, by=by)
+
+        if not args.get('diff'):
+            print_header(by=by)
+            for name, result in sorted_entries(entries.items()):
+                print_entry(name, result)
+        else:
+            prev_entries = dedup_entries(prev_results, by=by)
+            print_header(by='%s (%d added, %d removed)' % (by,
+                sum(1 for name in entries if name not in prev_entries),
+                sum(1 for name in prev_entries if name not in entries)))
+            for name, result in sorted_entries(entries.items()):
+                if args.get('all') or result != prev_entries.get(name, {}):
+                    print_diff_entry(name, prev_entries.get(name, {}), result)
+
+    def print_totals():
+        if not args.get('diff'):
+            print_entry('TOTAL', total)
+        else:
+            print_diff_entry('TOTAL', prev_total, total)
+
+    if args.get('summary'):
+        print_header()
+        print_totals()
+    elif args.get('files'):
+        print_entries(by='file')
+        print_totals()
+    else:
+        print_entries(by='name')
+        print_totals()
+
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        description="Summarize measurements")
+    parser.add_argument('csv_paths', nargs='*', default='-',
+        help="Description of where to find *.csv files. May be a directory \
+            or list of paths. *.csv files will be merged to show the total \
+            coverage.")
+    parser.add_argument('-d', '--diff',
+        help="Specify CSV file to diff against.")
+    parser.add_argument('-a', '--all', action='store_true',
+        help="Show all objects, not just the ones that changed.")
+    parser.add_argument('-e', '--all-fields', action='store_true',
+        help="Show all fields, even those with no results.")
+    parser.add_argument('-f', '--fields', type=lambda x: re.split('\s*,\s*', x),
+        help="Comma separated list of fields to print, by default all fields \
+            that are found in the CSV files are printed.")
+    parser.add_argument('-s', '--sort',
+        help="Sort by this field.")
+    parser.add_argument('-S', '--reverse-sort',
+        help="Sort by this field, but backwards.")
+    parser.add_argument('-F', '--files', action='store_true',
+        help="Show file-level calls.")
+    parser.add_argument('-Y', '--summary', action='store_true',
+        help="Only show the totals.")
+    sys.exit(main(**vars(parser.parse_args())))