Switched to lcov for coverage collection, greatly simplified coverage.py

Since we already have fairly complicated scriptts, I figured it wouldn't
be too hard to use the gcov tools and directly parse their output. Boy
was I wrong.

The gcov intermediary format is a bit of a mess. In version 5.4, a
text-based intermediary format is written to a single .gcov file per
executable. This changed sometime before version 7.5, when it started
writing separate .gcov files per .o files. And in version 9 this
intermediary format has been entirely replaced with an incompatible json
format!

Ironically, this means the internal-only .gcda/.gcno binary format has
actually been more stable than the intermediary format.

Also there's no way to avoid temporary .gcov files generated in the
project root, which risks messing with how test.py runs parallel tests.
Fortunately this looks like it will be fixed in gcov version 9.

---

Ended up switching to lcov, which was the right way to go. lcov handles
all of the gcov parsing, provides an easily parsable output, and even
provides a set of higher-level commands to manage coverage collection
from different runs.

Since this is all provided by lcov, was able to simplify coverage.py
quite a bit. Now it just parses the .info files output by lcov.
This commit is contained in:
Christopher Haster
2021-01-01 23:35:16 -06:00
parent eeeceb9e30
commit 887f3660ed
3 changed files with 147 additions and 327 deletions

View File

@@ -8,211 +8,57 @@ import re
import collections as co
import bisect as b
RESULTDIR = 'results'
#RULES = """
#define FLATTEN
#%(sizedir)s/%(build)s.$(subst /,.,$(target)): $(target)
# ( echo "#line 1 \\"$$<\\"" ; %(cat)s $$< ) > $$@
#%(sizedir)s/%(build)s.$(subst /,.,$(target:.c=.size)): \\
# %(sizedir)s/%(build)s.$(subst /,.,$(target:.c=.o))
# $(NM) --size-sort $$^ | sed 's/^/$(subst /,\\/,$(target:.c=.o)):/' > $$@
#endef
#$(foreach target,$(SRC),$(eval $(FLATTEN)))
#
#-include %(sizedir)s/*.d
#.SECONDARY:
#
#%%.size: $(foreach t,$(subst /,.,$(OBJ:.o=.size)),%%.$t)
# cat $^ > $@
#"""
#CATS = {
# 'code': 'cat',
# 'code_inlined': 'sed \'s/^static\( inline\)\?//\'',
#}
#
#def build(**args):
# # mkdir -p sizedir
# os.makedirs(args['sizedir'], exist_ok=True)
#
# if args.get('inlined', False):
# builds = ['code', 'code_inlined']
# else:
# builds = ['code']
#
# # write makefiles for the different types of builds
# makefiles = []
# targets = []
# for build in builds:
# path = args['sizedir'] + '/' + build
# with open(path + '.mk', 'w') as mk:
# mk.write(RULES.replace(4*' ', '\t') % dict(
# sizedir=args['sizedir'],
# build=build,
# cat=CATS[build]))
# mk.write('\n')
#
# # pass on defines
# for d in args['D']:
# mk.write('%s: override CFLAGS += -D%s\n' % (
# path+'.size', d))
#
# makefiles.append(path + '.mk')
# targets.append(path + '.size')
#
# # build in parallel
# cmd = (['make', '-f', 'Makefile'] +
# list(it.chain.from_iterable(['-f', m] for m in makefiles)) +
# [target for target in targets])
# if args.get('verbose', False):
# print(' '.join(shlex.quote(c) for c in cmd))
# proc = sp.Popen(cmd,
# stdout=sp.DEVNULL if not args.get('verbose', False) else None)
# proc.wait()
# if proc.returncode != 0:
# sys.exit(-1)
#
# # find results
# build_results = co.defaultdict(lambda: 0)
# # notes
# # - filters type
# # - discards internal/debug functions (leading __)
# pattern = re.compile(
# '^(?P<file>[^:]+)' +
# ':(?P<size>[0-9a-fA-F]+)' +
# ' (?P<type>[%s])' % re.escape(args['type']) +
# ' (?!__)(?P<name>.+?)$')
# for build in builds:
# path = args['sizedir'] + '/' + build
# with open(path + '.size') as size:
# for line in size:
# match = pattern.match(line)
# if match:
# file = match.group('file')
# # discard .8449 suffixes created by optimizer
# name = re.sub('\.[0-9]+', '', match.group('name'))
# size = int(match.group('size'), 16)
# build_results[(build, file, name)] += size
#
# results = []
# for (build, file, name), size in build_results.items():
# if build == 'code':
# results.append((file, name, size, False))
# elif (build == 'code_inlined' and
# ('inlined', file, name) not in results):
# results.append((file, name, size, True))
#
# return results
INFO_PATHS = 'tests/*.toml.info'
def collect(covfuncs, covlines, path, **args):
with open(path) as f:
file = None
filter = args['filter'].split() if args.get('filter') else None
pattern = re.compile(
'^(?P<file>file'
':(?P<file_name>.*))' +
'|(?P<func>function' +
':(?P<func_lineno>[0-9]+)' +
',(?P<func_hits>[0-9]+)' +
',(?P<func_name>.*))' +
'|(?P<line>lcount' +
':(?P<line_lineno>[0-9]+)' +
',(?P<line_hits>[0-9]+))$')
for line in f:
match = pattern.match(line)
if match:
if match.group('file'):
file = match.group('file_name')
# filter?
if filter and file not in filter:
file = None
elif file is not None and match.group('func'):
lineno = int(match.group('func_lineno'))
name, hits = covfuncs[(file, lineno)]
covfuncs[(file, lineno)] = (
name or match.group('func_name'),
hits + int(match.group('func_hits')))
elif file is not None and match.group('line'):
lineno = int(match.group('line_lineno'))
covlines[(file, lineno)] += int(match.group('line_hits'))
def coverage(**args):
# find *.gcov files
gcovpaths = []
for gcovpath in args.get('gcovpaths') or [args['results']]:
if os.path.isdir(gcovpath):
gcovpath = gcovpath + '/*.gcov'
def collect(paths, **args):
file = None
funcs = []
lines = co.defaultdict(lambda: 0)
pattern = re.compile(
'^(?P<file>SF:/?(?P<file_name>.*))$'
'|^(?P<func>FN:(?P<func_lineno>[0-9]*),(?P<func_name>.*))$'
'|^(?P<line>DA:(?P<line_lineno>[0-9]*),(?P<line_hits>[0-9]*))$')
for path in paths:
with open(path) as f:
for line in f:
m = pattern.match(line)
if m and m.group('file'):
file = m.group('file_name')
elif m and file and m.group('func'):
funcs.append((file, int(m.group('func_lineno')),
m.group('func_name')))
elif m and file and m.group('line'):
lines[(file, int(m.group('line_lineno')))] += (
int(m.group('line_hits')))
for path in glob.glob(gcovpath):
gcovpaths.append(path)
if not gcovpaths:
print('no gcov files found in %r?'
% (args.get('gcovpaths') or [args['results']]))
sys.exit(-1)
# collect coverage info
covfuncs = co.defaultdict(lambda: (None, 0))
covlines = co.defaultdict(lambda: 0)
for path in gcovpaths:
collect(covfuncs, covlines, path, **args)
# merge? go ahead and handle that here, but
# with a copy so we only report on the current coverage
if args.get('merge', None):
if os.path.isfile(args['merge']):
accfuncs = covfuncs.copy()
acclines = covlines.copy()
collect(accfuncs, acclines, args['merge']) # don't filter!
# map line numbers to functions
funcs.sort()
def func_from_lineno(file, lineno):
i = b.bisect(funcs, (file, lineno))
if i and funcs[i-1][0] == file:
return funcs[i-1][2]
else:
accfuncs = covfuncs
acclines = covlines
return None
accfiles = sorted({file for file, _ in acclines.keys()})
accfuncs, i = sorted(accfuncs.items()), 0
acclines, j = sorted(acclines.items()), 0
with open(args['merge'], 'w') as f:
for file in accfiles:
f.write('file:%s\n' % file)
while i < len(accfuncs) and accfuncs[i][0][0] == file:
((_, lineno), (name, hits)) = accfuncs[i]
f.write('function:%d,%d,%s\n' % (lineno, hits, name))
i += 1
while j < len(acclines) and acclines[j][0][0] == file:
((_, lineno), hits) = acclines[j]
f.write('lcount:%d,%d\n' % (lineno, hits))
j += 1
# annotate?
if args.get('annotate', False):
# annotate(covlines, **args)
pass
# condense down to file/function results
funcs = sorted(covfuncs.items())
func_lines = [(file, lineno) for (file, lineno), _ in funcs]
func_names = [name for _, (name, _) in funcs]
def line_func(file, lineno):
i = b.bisect(func_lines, (file, lineno))
if i and func_lines[i-1][0] == file:
return func_names[i-1]
else:
return '???'
func_results = co.defaultdict(lambda: (0, 0))
for ((file, lineno), hits) in covlines.items():
func = line_func(file, lineno)
branch_hits, branches = func_results[(file, func)]
func_results[(file, func)] = (branch_hits + (hits > 0), branches + 1)
# reduce to function info
reduced_funcs = co.defaultdict(lambda: (0, 0))
for (file, line_lineno), line_hits in lines.items():
func = func_from_lineno(file, line_lineno)
if not func:
continue
hits, count = reduced_funcs[(file, func)]
reduced_funcs[(file, func)] = (hits + (line_hits > 0), count + 1)
results = []
for (file, func), (hits, branches) in func_results.items():
for (file, func), (hits, count) in reduced_funcs.items():
# discard internal/testing functions (test_* injected with
# internal testing)
if func == '???' or func.startswith('__') or func.startswith('test_'):
if func.startswith('__') or func.startswith('test_'):
continue
# discard .8449 suffixes created by optimizer
func = re.sub('\.[0-9]+', '', func)
results.append((file, func, hits, branches))
results.append((file, func, hits, count))
return results
@@ -220,7 +66,20 @@ def coverage(**args):
def main(**args):
# find coverage
if not args.get('input', None):
results = coverage(**args)
# find *.info files
paths = []
for path in args['info_paths']:
if os.path.isdir(path):
path = path + '/*.gcov'
for path in glob.glob(path, recursive=True):
paths.append(path)
if not paths:
print('no .info files found in %r?' % args['info_paths'])
sys.exit(-1)
results = collect(paths, **args)
else:
with open(args['input']) as f:
r = csv.DictReader(f)
@@ -228,13 +87,13 @@ def main(**args):
( result['file'],
result['function'],
int(result['hits']),
int(result['branches']))
int(result['count']))
for result in r]
total_hits, total_branches = 0, 0
for _, _, hits, branches in results:
total_hits, total_count = 0, 0
for _, _, hits, count in results:
total_hits += hits
total_branches += branches
total_count += count
# find previous results?
if args.get('diff', None):
@@ -244,51 +103,51 @@ def main(**args):
( result['file'],
result['function'],
int(result['hits']),
int(result['branches']))
int(result['count']))
for result in r]
prev_total_hits, prev_total_branches = 0, 0
for _, _, hits, branches in prev_results:
prev_total_hits, prev_total_count = 0, 0
for _, _, hits, count in prev_results:
prev_total_hits += hits
prev_total_branches += branches
prev_total_count += count
# write results to CSV
if args.get('output', None):
results.sort(key=lambda x: (-(x[2]/x[3]), -x[3], x))
results.sort(key=lambda x: (-(x[3]-x[2]), -x[3], x))
with open(args['output'], 'w') as f:
w = csv.writer(f)
w.writerow(['file', 'function', 'hits', 'branches'])
for file, func, hits, branches in results:
w.writerow((file, func, hits, branches))
w.writerow(['file', 'function', 'hits', 'count'])
for file, func, hits, count in results:
w.writerow((file, func, hits, count))
# print results
def dedup_entries(results, by='function'):
entries = co.defaultdict(lambda: (0, 0))
for file, func, hits, branches in results:
for file, func, hits, count in results:
entry = (file if by == 'file' else func)
entry_hits, entry_branches = entries[entry]
entries[entry] = (entry_hits + hits, entry_branches + branches)
entry_hits, entry_count = entries[entry]
entries[entry] = (entry_hits + hits, entry_count + count)
return entries
def diff_entries(olds, news):
diff = co.defaultdict(lambda: (None, None, None, None, None, None))
for name, (new_hits, new_branches) in news.items():
for name, (new_hits, new_count) in news.items():
diff[name] = (
0, 0,
new_hits, new_branches,
new_hits, new_branches)
for name, (old_hits, old_branches) in olds.items():
new_hits, new_count,
new_hits, new_count)
for name, (old_hits, old_count) in olds.items():
new_hits = diff[name][2] or 0
new_branches = diff[name][3] or 0
new_count = diff[name][3] or 0
diff[name] = (
old_hits, old_branches,
new_hits, new_branches,
new_hits-old_hits, new_branches-old_branches)
old_hits, old_count,
new_hits, new_count,
new_hits-old_hits, new_count-old_count)
return diff
def print_header(by=''):
if not args.get('diff', False):
print('%-36s %11s' % (by, 'branches'))
print('%-36s %11s' % (by, 'hits/count'))
else:
print('%-36s %11s %11s %11s' % (by, 'old', 'new', 'diff'))
@@ -297,11 +156,11 @@ def main(**args):
if not args.get('diff', None):
print_header(by=by)
for name, (hits, branches) in sorted(entries.items(),
key=lambda x: (-(x[1][0]-x[1][1]), -x[1][1], x)):
for name, (hits, count) in sorted(entries.items(),
key=lambda x: (-(x[1][1]-x[1][0]), -x[1][1], x)):
print("%-36s %11s (%.2f%%)" % (name,
'%d/%d' % (hits, branches),
100*(hits/branches if branches else 1.0)))
'%d/%d' % (hits, count),
100*(hits/count if count else 1.0)))
else:
prev_entries = dedup_entries(prev_results, by=by)
diff = diff_entries(prev_entries, entries)
@@ -309,49 +168,49 @@ def main(**args):
sum(1 for _, old, _, _, _, _ in diff.values() if not old),
sum(1 for _, _, _, new, _, _ in diff.values() if not new)))
for name, (
old_hits, old_branches,
new_hits, new_branches,
diff_hits, diff_branches) in sorted(diff.items(),
old_hits, old_count,
new_hits, new_count,
diff_hits, diff_count) in sorted(diff.items(),
key=lambda x: (
-(x[1][4]-x[1][5]), -x[1][5], -x[1][3], x)):
ratio = ((new_hits/new_branches if new_branches else 1.0)
- (old_hits/old_branches if old_branches else 1.0))
if diff_hits or diff_branches or args.get('all', False):
-(x[1][5]-x[1][4]), -x[1][5], -x[1][3], x)):
ratio = ((new_hits/new_count if new_count else 1.0)
- (old_hits/old_count if old_count else 1.0))
if diff_hits or diff_count or args.get('all', False):
print("%-36s %11s %11s %11s%s" % (name,
'%d/%d' % (old_hits, old_branches)
if old_branches else '-',
'%d/%d' % (new_hits, new_branches)
if new_branches else '-',
'%+d/%+d' % (diff_hits, diff_branches),
'%d/%d' % (old_hits, old_count)
if old_count else '-',
'%d/%d' % (new_hits, new_count)
if new_count else '-',
'%+d/%+d' % (diff_hits, diff_count),
' (%+.2f%%)' % (100*ratio) if ratio else ''))
def print_totals():
if not args.get('diff', None):
print("%-36s %11s (%.2f%%)" % ('TOTALS',
'%d/%d' % (total_hits, total_branches),
100*(total_hits/total_branches if total_branches else 1.0)))
'%d/%d' % (total_hits, total_count),
100*(total_hits/total_count if total_count else 1.0)))
else:
ratio = ((total_hits/total_branches
if total_branches else 1.0)
- (prev_total_hits/prev_total_branches
if prev_total_branches else 1.0))
ratio = ((total_hits/total_count
if total_count else 1.0)
- (prev_total_hits/prev_total_count
if prev_total_count else 1.0))
print("%-36s %11s %11s %11s%s" % ('TOTALS',
'%d/%d' % (prev_total_hits, prev_total_branches),
'%d/%d' % (total_hits, total_branches),
'%d/%d' % (prev_total_hits, prev_total_count),
'%d/%d' % (total_hits, total_count),
'%+d/%+d' % (total_hits-prev_total_hits,
total_branches-prev_total_branches),
total_count-prev_total_count),
' (%+.2f%%)' % (100*ratio) if ratio else ''))
def print_status():
if not args.get('diff', None):
print("%d/%d (%.2f%%)" % (total_hits, total_branches,
100*(total_hits/total_branches if total_branches else 1.0)))
print("%d/%d (%.2f%%)" % (total_hits, total_count,
100*(total_hits/total_count if total_count else 1.0)))
else:
ratio = ((total_hits/total_branches
if total_branches else 1.0)
- (prev_total_hits/prev_total_branches
if prev_total_branches else 1.0))
print("%d/%d (%+.2f%%)" % (total_hits, total_branches,
ratio = ((total_hits/total_count
if total_count else 1.0)
- (prev_total_hits/prev_total_count
if prev_total_count else 1.0))
print("%d/%d (%+.2f%%)" % (total_hits, total_count,
(100*ratio) if ratio else ''))
if args.get('quiet', False):
@@ -373,25 +232,10 @@ if __name__ == "__main__":
import sys
parser = argparse.ArgumentParser(
description="Show/manipulate coverage info")
parser.add_argument('gcovpaths', nargs='*',
help="Description of *.gcov files to use for coverage info. May be \
a directory or list of files. Coverage files will be merged to \
show the total coverage. Defaults to \"%s\"." % RESULTDIR)
parser.add_argument('--results', default=RESULTDIR,
help="Directory to store results. Created implicitly. Used if \
annotated files are requested. Defaults to \"%s\"." % RESULTDIR)
parser.add_argument('--merge',
help="Merge coverage info into the specified file, writing the \
cumulative coverage info to the file. The output from this script \
does not include the coverage from the merge file.")
parser.add_argument('--filter',
help="Specify files with care about, all other coverage info (system \
headers, test framework, etc) will be discarded.")
parser.add_argument('--annotate', action='store_true',
help="Output annotated source files into the result directory. Each \
line will be annotated with the number of hits during testing. \
This is useful for finding out which lines do not have test \
coverage.")
parser.add_argument('info_paths', nargs='*', default=[INFO_PATHS],
help="Description of where to find *.info files. May be a directory \
or list of paths. *.info files will be merged to show the total \
coverage. Defaults to \"%s\"." % INFO_PATHS)
parser.add_argument('-v', '--verbose', action='store_true',
help="Output commands that run behind the scenes.")
parser.add_argument('-i', '--input',

View File

@@ -21,7 +21,6 @@ import errno
import signal
TESTDIR = 'tests'
RESULTDIR = 'results' # only used for coverage
RULES = """
define FLATTEN
%(path)s%%$(subst /,.,$(target)): $(target)
@@ -35,22 +34,27 @@ $(foreach target,$(SRC),$(eval $(FLATTEN)))
%(path)s.test: %(path)s.test.o $(foreach t,$(subst /,.,$(OBJ)),%(path)s.$t)
$(CC) $(CFLAGS) $^ $(LFLAGS) -o $@
"""
COVERAGE_TEST_RULES = """
COVERAGE_RULES = """
%(path)s.test: override CFLAGS += -fprofile-arcs -ftest-coverage
# delete lingering coverage info during build
%(path)s.test: | %(path)s.test.clean
.PHONY: %(path)s.test.clean
%(path)s.test.clean:
# delete lingering coverage
%(path)s.test: | %(path)s.info.clean
.PHONY: %(path)s.clean
%(path)s.clean:
rm -f %(path)s*.gcda
override TEST_GCDAS += %(path)s*.gcda
"""
COVERAGE_RESULT_RULES = """
# dependencies defined in test makefiles
.PHONY: %(results)s/coverage.gcov
%(results)s/coverage.gcov: $(patsubst %%,%%.gcov,$(wildcard $(TEST_GCDAS)))
./scripts/coverage.py -s $^ --filter="$(SRC)" --merge=$@
# accumulate coverage info
.PHONY: %(path)s.info
%(path)s.info:
$(strip $(LCOV) -c \\
$(addprefix -d ,$(wildcard %(path)s*.gcda)) \\
--rc 'geninfo_adjust_src_path=$(shell pwd)' \\
-o $@)
$(LCOV) -e $@ $(addprefix /,$(SRC)) -o $@
.PHONY: %(path)s.cumul.info
%(path)s.cumul.info: %(path)s.info
$(LCOV) -a $< $(addprefix -a ,$(wildcard $@)) -o $@
"""
GLOBALS = """
//////////////// AUTOGENERATED TEST ////////////////
@@ -539,8 +543,7 @@ class TestSuite:
# add coverage hooks?
if args.get('coverage', False):
mk.write(COVERAGE_TEST_RULES.replace(4*' ', '\t') % dict(
results=args['results'],
mk.write(COVERAGE_RULES.replace(4*' ', '\t') % dict(
path=self.path))
mk.write('\n')
@@ -749,40 +752,14 @@ def main(**args):
failed += 1
if args.get('coverage', False):
# mkdir -p resultdir
os.makedirs(args['results'], exist_ok=True)
# collect coverage info
hits, branches = 0, 0
with open(args['results'] + '/coverage.mk', 'w') as mk:
mk.write(COVERAGE_RESULT_RULES.replace(4*' ', '\t') % dict(
results=args['results']))
cmd = (['make', '-f', 'Makefile'] +
list(it.chain.from_iterable(['-f', m] for m in makefiles)) +
['-f', args['results'] + '/coverage.mk',
args['results'] + '/coverage.gcov'])
mpty, spty = pty.openpty()
[re.sub('\.test$', '.cumul.info', target) for target in targets])
if args.get('verbose', False):
print(' '.join(shlex.quote(c) for c in cmd))
proc = sp.Popen(cmd, stdout=spty)
os.close(spty)
mpty = os.fdopen(mpty, 'r', 1)
while True:
try:
line = mpty.readline()
except OSError as e:
if e.errno == errno.EIO:
break
raise
if args.get('verbose', False):
sys.stdout.write(line)
# get coverage status
m = re.match('^TOTALS +([0-9]+)/([0-9]+)', line)
if m:
hits = int(m.group(1))
branches = int(m.group(2))
proc = sp.Popen(cmd,
stdout=sp.DEVNULL if not args.get('verbose', False) else None)
proc.wait()
if proc.returncode != 0:
sys.exit(-3)
@@ -803,9 +780,6 @@ def main(**args):
100*(passed/total if total else 1.0)))
print('tests failed %d/%d (%.2f%%)' % (failed, total,
100*(failed/total if total else 1.0)))
if args.get('coverage', False):
print('coverage %d/%d (%.2f%%)' % (hits, branches,
100*(hits/branches if branches else 1.0)))
return 1 if failed > 0 else 0
if __name__ == "__main__":
@@ -818,9 +792,6 @@ if __name__ == "__main__":
directory of tests, a specific file, a suite by name, and even a \
specific test case by adding brackets. For example \
\"test_dirs[0]\" or \"{0}/test_dirs.toml[0]\".".format(TESTDIR))
parser.add_argument('--results', default=RESULTDIR,
help="Directory to store results. Created implicitly. Only used in \
this script for coverage information if --coverage is provided.")
parser.add_argument('-D', action='append', default=[],
help="Overriding parameter definitions.")
parser.add_argument('-v', '--verbose', action='store_true',
@@ -848,8 +819,8 @@ if __name__ == "__main__":
parser.add_argument('--disk',
help="Specify a file to use for persistent/reentrant tests.")
parser.add_argument('--coverage', action='store_true',
help="Collect coverage information across tests. This is stored in \
the results directory. Coverage is not reset between runs \
allowing multiple test runs to contribute to coverage \
information.")
help="Collect coverage information during testing. This uses lcov/gcov \
to accumulate coverage information into *.info files. Note \
coverage is not reset between runs, allowing multiple runs to \
contribute to coverage.")
sys.exit(main(**vars(parser.parse_args())))