From 9453ebd15d590778a5207920b2671a4fff91a9e0 Mon Sep 17 00:00:00 2001 From: Christopher Haster Date: Sat, 18 Jan 2020 20:20:43 -0600 Subject: [PATCH] Added/improved disk-reading debug scripts Also fixed a bug in dir splitting when there's a large number of open files, which was the main reason I was trying to make it easier to debug disk images. One part of the recent test changes was to move away from the file-per-block emubd and instead simulate storage with a single contiguous file. The file-per-block format was marginally useful at the beginning, but as the remaining bugs get more subtle, it becomes more useful to inspect littlefs through scripts that make the underlying metadata more human-readable. The key benefit of switching to a contiguous file is these same scripts can be reused for real disk images and can even read through /dev/sdb or similar. - ./scripts/readblock.py disk block_size block off data 00000000: 71 01 00 00 f0 0f ff f7 6c 69 74 74 6c 65 66 73 q.......littlefs 00000010: 2f e0 00 10 00 00 02 00 00 02 00 00 00 04 00 00 /............... 00000020: ff 00 00 00 ff ff ff 7f fe 03 00 00 20 00 04 19 ............... 00000030: 61 00 00 0c 00 62 20 30 0c 09 a0 01 00 00 64 00 a....b 0......d. ... readblock.py prints a hex dump of a given block on disk. It's basically just "dd if=disk bs=block_size count=1 skip=block | xxd -g1 -" but with less typing. - ./scripts/readmdir.py disk block_size block1 block2 off tag type id len data (truncated) 0000003b: 0020000a dir 0 10 63 6f 6c 64 63 6f 66 66 coldcoff 00000049: 20000008 dirstruct 0 8 02 02 00 00 03 02 00 00 ........ 00000008: 00200409 dir 1 9 68 6f 74 63 6f 66 66 65 hotcoffe 00000015: 20000408 dirstruct 1 8 fe 01 00 00 ff 01 00 00 ........ readmdir.py prints info about the tags in a metadata pair on disk. It can print the currently active tags as well as the raw log of the metadata pair. - ./scripts/readtree.py disk block_size superblock "littlefs" version v2.0 block_size 512 block_count 1024 name_max 255 file_max 2147483647 attr_max 1022 gstate 0x000000000000000000000000 dir "/" mdir {0x0, 0x1} rev 3 v id 0 superblock "littlefs" inline size 24 mdir {0x77, 0x78} rev 1 id 0 dir "coffee" dir {0x1fc, 0x1fd} dir "/coffee" mdir {0x1fd, 0x1fc} rev 2 id 0 dir "coldcoffee" dir {0x202, 0x203} id 1 dir "hotcoffee" dir {0x1fe, 0x1ff} dir "/coffee/coldcoffee" mdir {0x202, 0x203} rev 1 dir "/coffee/warmcoffee" mdir {0x200, 0x201} rev 1 readtree.py parses the littlefs tree and prints info about the semantics of what's on disk. This includes the superblock, global-state, and directories/metadata-pairs. It doesn't print the filesystem tree though, that could be a different tool. --- lfs.c | 2 +- scripts/readblock.py | 26 +++ scripts/readmdir.py | 332 ++++++++++++++++++++++++++++++++++ scripts/readtree.py | 273 ++++++++++++++++++++++++++++ scripts/test_.py | 4 +- tests_/test_exhaustion.toml | 8 +- tests_/test_interspersed.toml | 26 +-- tests_/test_seek.toml | 8 +- 8 files changed, 645 insertions(+), 34 deletions(-) create mode 100755 scripts/readblock.py create mode 100755 scripts/readmdir.py create mode 100755 scripts/readtree.py diff --git a/lfs.c b/lfs.c index 2758c29..e26d745 100644 --- a/lfs.c +++ b/lfs.c @@ -1825,7 +1825,7 @@ compact: // for things that are for (struct lfs_mlist *d = lfs->mlist; d; d = d->next) { if (lfs_pair_cmp(d->m.pair, copy.pair) == 0) { - d->m = *dir; + d->m = copy; if (d->id == lfs_tag_id(deletetag)) { d->m.pair[0] = LFS_BLOCK_NULL; d->m.pair[1] = LFS_BLOCK_NULL; diff --git a/scripts/readblock.py b/scripts/readblock.py new file mode 100755 index 0000000..817517b --- /dev/null +++ b/scripts/readblock.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 + +import subprocess as sp + +def main(args): + with open(args.disk, 'rb') as f: + f.seek(args.block * args.block_size) + block = (f.read(args.block_size) + .ljust(args.block_size, b'\xff')) + + # what did you expect? + print("%-8s %-s" % ('off', 'data')) + return sp.run(['xxd', '-g1', '-'], input=block).returncode + +if __name__ == "__main__": + import argparse + import sys + parser = argparse.ArgumentParser( + description="Hex dump a specific block in a disk.") + parser.add_argument('disk', + help="File representing the block device.") + parser.add_argument('block_size', type=lambda x: int(x, 0), + help="Size of a block in bytes.") + parser.add_argument('block', type=lambda x: int(x, 0), + help="Address of block to dump.") + sys.exit(main(parser.parse_args())) diff --git a/scripts/readmdir.py b/scripts/readmdir.py new file mode 100755 index 0000000..a60acbe --- /dev/null +++ b/scripts/readmdir.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python3 + +import struct +import binascii +import itertools as it + +TAG_TYPES = { + 'splice': (0x700, 0x400), + 'create': (0x7ff, 0x401), + 'delete': (0x7ff, 0x4ff), + 'name': (0x700, 0x000), + 'reg': (0x7ff, 0x001), + 'dir': (0x7ff, 0x002), + 'superblock': (0x7ff, 0x0ff), + 'struct': (0x700, 0x200), + 'dirstruct': (0x7ff, 0x200), + 'ctzstruct': (0x7ff, 0x202), + 'inlinestruct': (0x7ff, 0x201), + 'userattr': (0x700, 0x300), + 'tail': (0x700, 0x600), + 'softtail': (0x7ff, 0x600), + 'hardtail': (0x7ff, 0x601), + 'gstate': (0x700, 0x700), + 'movestate': (0x7ff, 0x7ff), + 'crc': (0x700, 0x500), +} + +class Tag: + def __init__(self, *args): + if len(args) == 1: + self.tag = args[0] + elif len(args) == 3: + if isinstance(args[0], str): + type = TAG_TYPES[args[0]][1] + else: + type = args[0] + + if isinstance(args[1], str): + id = int(args[1], 0) if args[1] not in 'x.' else 0x3ff + else: + id = args[1] + + if isinstance(args[2], str): + size = int(args[2], str) if args[2] not in 'x.' else 0x3ff + else: + size = args[2] + + self.tag = (type << 20) | (id << 10) | size + else: + assert False + + @property + def isvalid(self): + return not bool(self.tag & 0x80000000) + + @property + def isattr(self): + return not bool(self.tag & 0x40000000) + + @property + def iscompactable(self): + return bool(self.tag & 0x20000000) + + @property + def isunique(self): + return not bool(self.tag & 0x10000000) + + @property + def type(self): + return (self.tag & 0x7ff00000) >> 20 + + @property + def type1(self): + return (self.tag & 0x70000000) >> 20 + + @property + def type3(self): + return (self.tag & 0x7ff00000) >> 20 + + @property + def id(self): + return (self.tag & 0x000ffc00) >> 10 + + @property + def size(self): + return (self.tag & 0x000003ff) >> 0 + + @property + def dsize(self): + return 4 + (self.size if self.size != 0x3ff else 0) + + @property + def chunk(self): + return self.type & 0xff + + @property + def schunk(self): + return struct.unpack('b', struct.pack('B', self.chunk))[0] + + def is_(self, type): + return (self.type & TAG_TYPES[type][0]) == TAG_TYPES[type][1] + + def mkmask(self): + return Tag( + 0x700 if self.isunique else 0x7ff, + 0x3ff if self.isattr else 0, + 0) + + def typerepr(self): + if self.is_('crc') and getattr(self, 'crc', 0xffffffff) != 0xffffffff: + return 'crc (bad)' + + reverse_types = {v: k for k, v in TAG_TYPES.items()} + for prefix in range(12): + mask = 0x7ff & ~((1 << prefix)-1) + if (mask, self.type & mask) in reverse_types: + type = reverse_types[mask, self.type & mask] + if prefix > 0: + return '%s %#0*x' % ( + type, prefix//4, self.type & ((1 << prefix)-1)) + else: + return type + else: + return '%02x' % self.type + + def idrepr(self): + return repr(self.id) if self.id != 0x3ff else '.' + + def sizerepr(self): + return repr(self.size) if self.size != 0x3ff else 'x' + + def __repr__(self): + return 'Tag(%r, %d, %d)' % (self.typerepr(), self.id, self.size) + + def __lt__(self, other): + return (self.id, self.type) < (other.id, other.type) + + def __bool__(self): + return self.isvalid + + def __int__(self): + return self.tag + + def __index__(self): + return self.tag + +class MetadataPair: + def __init__(self, blocks): + if len(blocks) > 1: + self.pair = [MetadataPair([block]) for block in blocks] + self.pair = sorted(self.pair, reverse=True) + + self.data = self.pair[0].data + self.rev = self.pair[0].rev + self.tags = self.pair[0].tags + self.ids = self.pair[0].ids + self.log = self.pair[0].log + self.all_ = self.pair[0].all_ + return + + self.pair = [self] + self.data = blocks[0] + block = self.data + + self.rev, = struct.unpack('= 4: + ntag, = struct.unpack('>I', block[off:off+4]) + + tag = Tag(int(tag) ^ ntag) + tag.off = off + 4 + tag.data = block[off+4:off+tag.dsize] + if tag.is_('crc'): + crc = binascii.crc32(block[off:off+4+4], crc) + else: + crc = binascii.crc32(block[off:off+tag.dsize], crc) + tag.crc = crc + off += tag.dsize + + self.all_.append(tag) + + if tag.is_('crc'): + # is valid commit? + if crc != 0xffffffff: + corrupt = True + if not corrupt: + self.log = self.all_.copy() + + # reset tag parsing + crc = 0 + tag = Tag(int(tag) ^ ((tag.type & 1) << 31)) + + # find most recent tags + self.tags = [] + for tag in self.log: + if tag.is_('crc') or tag.is_('splice'): + continue + + if tag in self and self[tag] is tag: + self.tags.append(tag) + + self.tags = sorted(self.tags) + + # and ids + self.ids = list(it.takewhile( + lambda id: Tag('name', id, 0) in self, + it.count())) + + def __bool__(self): + return bool(self.log) + + def __lt__(self, other): + # corrupt blocks don't count + if not self and other: + return True + + # use sequence arithmetic to avoid overflow + return not ((other.rev - self.rev) & 0x80000000) + + def __contains__(self, args): + try: + self[args] + return True + except KeyError: + return False + + def __getitem__(self, args): + if isinstance(args, tuple): + gmask, gtag = args + else: + gmask, gtag = args.mkmask(), args + + gdiff = 0 + for tag in reversed(self.log): + if (gmask.id != 0 and tag.is_('splice') and + tag.id <= gtag.id - gdiff): + if tag.is_('create') and tag.id == gtag.id - gdiff: + # creation point + break + + gdiff += tag.schunk + + if (int(gmask) & int(tag)) == (int(gmask) & int( + Tag(gtag.type, gtag.id - gdiff, gtag.size))): + if tag.size == 0x3ff: + # deleted + break + + return tag + + raise KeyError(gmask, gtag) + + def _dump_tags(self, tags, truncate=True): + sys.stdout.write("%-8s %-8s %-13s %4s %4s %s\n" % ( + 'off', 'tag', 'type', 'id', 'len', + 'data (truncated)' if truncate else 12*' '+'data')) + + for tag in tags: + sys.stdout.write("%08x: %08x %-13s %4s %4s" % ( + tag.off, tag, + tag.typerepr(), tag.idrepr(), tag.sizerepr())) + if truncate: + sys.stdout.write(" %-23s %-8s\n" % ( + ' '.join('%02x' % c for c in tag.data[:8]), + ''.join(c if c >= ' ' and c <= '~' else '.' + for c in map(chr, tag.data[:8])))) + else: + sys.stdout.write("\n") + for i in range(0, len(tag.data), 16): + sys.stdout.write("%08x: %-47s %-16s\n" % ( + tag.off+i, + ' '.join('%02x' % c for c in tag.data[i:i+16]), + ''.join(c if c >= ' ' and c <= '~' else '.' + for c in map(chr, tag.data[i:i+16])))) + + def dump_tags(self, truncate=True): + self._dump_tags(self.tags, truncate=truncate) + + def dump_log(self, truncate=True): + self._dump_tags(self.log, truncate=truncate) + + def dump_all(self, truncate=True): + self._dump_tags(self.all_, truncate=truncate) + +def main(args): + blocks = [] + with open(args.disk, 'rb') as f: + for block in [args.block1, args.block2]: + if block is None: + continue + f.seek(block * args.block_size) + blocks.append(f.read(args.block_size) + .ljust(args.block_size, b'\xff')) + + # find most recent pair + mdir = MetadataPair(blocks) + if args.all: + mdir.dump_all(truncate=not args.no_truncate) + elif args.log: + mdir.dump_log(truncate=not args.no_truncate) + else: + mdir.dump_tags(truncate=not args.no_truncate) + + return 0 if mdir else 1 + +if __name__ == "__main__": + import argparse + import sys + parser = argparse.ArgumentParser( + description="Dump useful info about metadata pairs in littlefs.") + parser.add_argument('disk', + help="File representing the block device.") + parser.add_argument('block_size', type=lambda x: int(x, 0), + help="Size of a block in bytes.") + parser.add_argument('block1', type=lambda x: int(x, 0), + help="First block address for finding the metadata pair.") + parser.add_argument('block2', nargs='?', type=lambda x: int(x, 0), + help="Second block address for finding the metadata pair.") + parser.add_argument('-a', '--all', action='store_true', + help="Show all tags in log, included tags in corrupted commits.") + parser.add_argument('-l', '--log', action='store_true', + help="Show tags in log.") + parser.add_argument('-T', '--no-truncate', action='store_true', + help="Don't truncate large amounts of data in tags.") + sys.exit(main(parser.parse_args())) diff --git a/scripts/readtree.py b/scripts/readtree.py new file mode 100755 index 0000000..2bae10e --- /dev/null +++ b/scripts/readtree.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python3 + +import struct +import sys +import json +import io +import itertools as it +from readmdir import Tag, MetadataPair + +def popc(x): + return bin(x).count('1') + +def ctz(x): + return len(bin(x)) - len(bin(x).rstrip('0')) + +def dumptags(args, mdir, f): + if args.all: + tags = mdir.all_ + elif args.log: + tags = mdir.log + else: + tags = mdir.tags + + for k, tag in enumerate(tags): + f.write("tag %08x %s" % (tag, tag.typerepr())) + if tag.id != 0x3ff: + f.write(" id %d" % tag.id) + if tag.size != 0x3ff: + f.write(" size %d" % tag.size) + if tag.is_('name'): + f.write(" name %s" % + json.dumps(tag.data.decode('utf8'))) + if tag.is_('dirstruct'): + f.write(" dir {%#x, %#x}" % struct.unpack( + '= ' ' and c <= '~' else '.' + for c in map(chr, tag.data[i:i+16])))) + +def dumpentries(args, mdir, f): + for k, id_ in enumerate(mdir.ids): + name = mdir[Tag('name', id_, 0)] + struct_ = mdir[Tag('struct', id_, 0)] + + f.write("id %d %s %s" % ( + name.id, name.typerepr(), + json.dumps(name.data.decode('utf8')))) + if struct_.is_('dirstruct'): + f.write(" dir {%#x, %#x}" % struct.unpack( + '= ' ' and c <= '~' else '.' + for c in map(chr, struct_.data[i:i+16])))) + elif args.data and struct_.is_('ctzstruct'): + block, size = struct.unpack( + '= 0: + f2.seek(block * args.block_size) + dat = f2.read(args.block_size) + data.append(dat[4*(ctz(i)+1) if i != 0 else 0:]) + block, = struct.unpack('= ' ' and c <= '~' else '.' + for c in map(chr, data[i:i+16])))) + + for tag in mdir.tags: + if tag.id==id_ and tag.is_('userattr'): + f.write("id %d %s size %d\n" % ( + id_, tag.typerepr(), tag.size)) + + if args.data: + for i in range(0, len(tag.data), 16): + f.write(" %-47s %-16s\n" % ( + ' '.join('%02x' % c for c in tag.data[i:i+16]), + ''.join(c if c >= ' ' and c <= '~' else '.' + for c in map(chr, tag.data[i:i+16])))) + +def main(args): + with open(args.disk, 'rb') as f: + dirs = [] + superblock = None + gstate = b'' + mdirs = [] + tail = (args.block1, args.block2) + hard = False + while True: + # load mdir + data = [] + blocks = {} + for block in tail: + f.seek(block * args.block_size) + data.append(f.read(args.block_size) + .ljust(args.block_size, b'\xff')) + blocks[id(data[-1])] = block + mdir = MetadataPair(data) + mdir.blocks = tuple(blocks[id(p.data)] for p in mdir.pair) + + # fetch some key metadata as a we scan + try: + mdir.tail = mdir[Tag('tail', 0, 0)] + if mdir.tail.size != 8 or mdir.tail.data == 8*b'\xff': + mdir.tail = None + except KeyError: + mdir.tail = None + + # have superblock? + try: + nsuperblock = mdir[ + Tag(0x7ff, 0x3ff, 0), Tag('superblock', 0, 0)] + superblock = nsuperblock, mdir[Tag('inlinestruct', 0, 0)] + except KeyError: + pass + + # have gstate? + try: + ngstate = mdir[Tag('movestate', 0, 0)] + gstate = bytes((a or 0) ^ (b or 0) + for a,b in it.zip_longest(gstate, ngstate.data)) + except KeyError: + pass + + # add to directories + mdirs.append(mdir) + if mdir.tail is None or not mdir.tail.is_('hardtail'): + dirs.append(mdirs) + mdirs = [] + + if mdir.tail is None: + break + + tail = struct.unpack(' 2*run_cycles[0]-run_cycles[0]/20); + // check we increased the lifetime by 2x with ~10% error + LFS_ASSERT(run_cycles[1] > 2*run_cycles[0]-run_cycles[0]/10); ''' [[case]] # wear-level test + expanding superblock @@ -336,6 +336,6 @@ exhausted: run_block_count[run], run_cycles[run]); } - // check we increased the lifetime by 2x with ~5% error - LFS_ASSERT(run_cycles[1] > 2*run_cycles[0]-run_cycles[0]/20); + // check we increased the lifetime by 2x with ~10% error + LFS_ASSERT(run_cycles[1] > 2*run_cycles[0]-run_cycles[0]/10); ''' diff --git a/tests_/test_interspersed.toml b/tests_/test_interspersed.toml index 32f79e7..87a0578 100644 --- a/tests_/test_interspersed.toml +++ b/tests_/test_interspersed.toml @@ -1,16 +1,7 @@ [[case]] # interspersed file test -# TODO FILES=26 found bug -#define.SIZE = [10, 100] -#define.FILES = [4, 10, 26] -define = [ - {SIZE=10, FILES=4}, - {SIZE=10, FILES=10}, - #{SIZE=10, FILES=26}, - {SIZE=100, FILES=4}, - {SIZE=100, FILES=10}, - #{SIZE=100, FILES=26}, -] +define.SIZE = [10, 100] +define.FILES = [4, 10, 26] code = ''' lfs_file_t files[FILES]; const char alphas[] = "abcdefghijklmnopqrstuvwxyz"; @@ -182,17 +173,8 @@ code = ''' ''' [[case]] # reentrant interspersed file test -# TODO FILES=26 found bug -#define.SIZE = [10, 100] -#define.FILES = [4, 10, 26] -define = [ - {SIZE=10, FILES=4}, - {SIZE=10, FILES=10}, - #{SIZE=10, FILES=26}, - {SIZE=100, FILES=4}, - #{SIZE=100, FILES=10}, - #{SIZE=100, FILES=26}, -] +define.SIZE = [10, 100] +define.FILES = [4, 10, 26] reentrant = true code = ''' lfs_file_t files[FILES]; diff --git a/tests_/test_seek.toml b/tests_/test_seek.toml index 586ab71..79d7728 100644 --- a/tests_/test_seek.toml +++ b/tests_/test_seek.toml @@ -24,14 +24,14 @@ code = ''' lfs_mount(&lfs, &cfg) => 0; lfs_file_open(&lfs, &file, "kitty", LFS_O_RDONLY) => 0; - lfs_soff_t pos; + lfs_soff_t pos = -1; size = strlen("kittycatcat"); for (int i = 0; i < SKIP; i++) { lfs_file_read(&lfs, &file, buffer, size) => size; memcmp(buffer, "kittycatcat", size) => 0; pos = lfs_file_tell(&lfs, &file); } - pos >= 0 => 1; + assert(pos >= 0); lfs_file_seek(&lfs, &file, pos, LFS_SEEK_SET) => pos; lfs_file_read(&lfs, &file, buffer, size) => size; @@ -93,14 +93,14 @@ code = ''' lfs_mount(&lfs, &cfg) => 0; lfs_file_open(&lfs, &file, "kitty", LFS_O_RDWR) => 0; - lfs_soff_t pos; + lfs_soff_t pos = -1; size = strlen("kittycatcat"); for (int i = 0; i < SKIP; i++) { lfs_file_read(&lfs, &file, buffer, size) => size; memcmp(buffer, "kittycatcat", size) => 0; pos = lfs_file_tell(&lfs, &file); } - pos >= 0 => 1; + assert(pos >= 0); memcpy(buffer, "doggodogdog", size); lfs_file_seek(&lfs, &file, pos, LFS_SEEK_SET) => pos;