Added/improved disk-reading debug scripts

Also fixed a bug in dir splitting when there's a large number of open files, which was the main reason I was trying to make it easier to debug disk images. One part of the recent test changes was to move away from the file-per-block emubd and instead simulate storage with a single contiguous file. The file-per-block format was marginally useful at the beginning, but as the remaining bugs get more subtle, it becomes more useful to inspect littlefs through scripts that make the underlying metadata more human-readable. The key benefit of switching to a contiguous file is these same scripts can be reused for real disk images and can even read through /dev/sdb or similar. - ./scripts/readblock.py disk block_size block off data 00000000: 71 01 00 00 f0 0f ff f7 6c 69 74 74 6c 65 66 73 q.......littlefs 00000010: 2f e0 00 10 00 00 02 00 00 02 00 00 00 04 00 00 /............... 00000020: ff 00 00 00 ff ff ff 7f fe 03 00 00 20 00 04 19 ............... 00000030: 61 00 00 0c 00 62 20 30 0c 09 a0 01 00 00 64 00 a....b 0......d. ... readblock.py prints a hex dump of a given block on disk. It's basically just "dd if=disk bs=block_size count=1 skip=block | xxd -g1 -" but with less typing. - ./scripts/readmdir.py disk block_size block1 block2 off tag type id len data (truncated) 0000003b: 0020000a dir 0 10 63 6f 6c 64 63 6f 66 66 coldcoff 00000049: 20000008 dirstruct 0 8 02 02 00 00 03 02 00 00 ........ 00000008: 00200409 dir 1 9 68 6f 74 63 6f 66 66 65 hotcoffe 00000015: 20000408 dirstruct 1 8 fe 01 00 00 ff 01 00 00 ........ readmdir.py prints info about the tags in a metadata pair on disk. It can print the currently active tags as well as the raw log of the metadata pair. - ./scripts/readtree.py disk block_size superblock "littlefs" version v2.0 block_size 512 block_count 1024 name_max 255 file_max 2147483647 attr_max 1022 gstate 0x000000000000000000000000 dir "/" mdir {0x0, 0x1} rev 3 v id 0 superblock "littlefs" inline size 24 mdir {0x77, 0x78} rev 1 id 0 dir "coffee" dir {0x1fc, 0x1fd} dir "/coffee" mdir {0x1fd, 0x1fc} rev 2 id 0 dir "coldcoffee" dir {0x202, 0x203} id 1 dir "hotcoffee" dir {0x1fe, 0x1ff} dir "/coffee/coldcoffee" mdir {0x202, 0x203} rev 1 dir "/coffee/warmcoffee" mdir {0x200, 0x201} rev 1 readtree.py parses the littlefs tree and prints info about the semantics of what's on disk. This includes the superblock, global-state, and directories/metadata-pairs. It doesn't print the filesystem tree though, that could be a different tool.
2025-10-31 08:42:40 +01:00 · 2020-01-18 20:20:43 -06:00
parent fb65057a3c
commit 9453ebd15d
8 changed files with 645 additions and 34 deletions
--- a/scripts/readmdir.py
+++ b/scripts/readmdir.py
@@ -0,0 +1,332 @@
+#!/usr/bin/env python3
+
+import struct
+import binascii
+import itertools as it
+
+TAG_TYPES = {
+    'splice':       (0x700, 0x400),
+    'create':       (0x7ff, 0x401),
+    'delete':       (0x7ff, 0x4ff),
+    'name':         (0x700, 0x000),
+    'reg':          (0x7ff, 0x001),
+    'dir':          (0x7ff, 0x002),
+    'superblock':   (0x7ff, 0x0ff),
+    'struct':       (0x700, 0x200),
+    'dirstruct':    (0x7ff, 0x200),
+    'ctzstruct':    (0x7ff, 0x202),
+    'inlinestruct': (0x7ff, 0x201),
+    'userattr':     (0x700, 0x300),
+    'tail':         (0x700, 0x600),
+    'softtail':     (0x7ff, 0x600),
+    'hardtail':     (0x7ff, 0x601),
+    'gstate':       (0x700, 0x700),
+    'movestate':    (0x7ff, 0x7ff),
+    'crc':          (0x700, 0x500),
+}
+
+class Tag:
+    def __init__(self, *args):
+        if len(args) == 1:
+            self.tag = args[0]
+        elif len(args) == 3:
+            if isinstance(args[0], str):
+                type = TAG_TYPES[args[0]][1]
+            else:
+                type = args[0]
+
+            if isinstance(args[1], str):
+                id = int(args[1], 0) if args[1] not in 'x.' else 0x3ff
+            else:
+                id = args[1]
+
+            if isinstance(args[2], str):
+                size = int(args[2], str) if args[2] not in 'x.' else 0x3ff
+            else:
+                size = args[2]
+
+            self.tag = (type << 20) | (id << 10) | size
+        else:
+            assert False
+
+    @property
+    def isvalid(self):
+        return not bool(self.tag & 0x80000000)
+
+    @property
+    def isattr(self):
+        return not bool(self.tag & 0x40000000)
+
+    @property
+    def iscompactable(self):
+        return bool(self.tag & 0x20000000)
+
+    @property
+    def isunique(self):
+        return not bool(self.tag & 0x10000000)
+
+    @property
+    def type(self):
+        return (self.tag & 0x7ff00000) >> 20
+
+    @property
+    def type1(self):
+        return (self.tag & 0x70000000) >> 20
+
+    @property
+    def type3(self):
+        return (self.tag & 0x7ff00000) >> 20
+
+    @property
+    def id(self):
+        return (self.tag & 0x000ffc00) >> 10
+
+    @property
+    def size(self):
+        return (self.tag & 0x000003ff) >> 0
+
+    @property
+    def dsize(self):
+        return 4 + (self.size if self.size != 0x3ff else 0)
+
+    @property
+    def chunk(self):
+        return self.type & 0xff
+
+    @property
+    def schunk(self):
+        return struct.unpack('b', struct.pack('B', self.chunk))[0]
+
+    def is_(self, type):
+        return (self.type & TAG_TYPES[type][0]) == TAG_TYPES[type][1]
+
+    def mkmask(self):
+        return Tag(
+            0x700 if self.isunique else 0x7ff,
+            0x3ff if self.isattr else 0,
+            0)
+
+    def typerepr(self):
+        if self.is_('crc') and getattr(self, 'crc', 0xffffffff) != 0xffffffff:
+            return 'crc (bad)'
+
+        reverse_types = {v: k for k, v in TAG_TYPES.items()}
+        for prefix in range(12):
+            mask = 0x7ff & ~((1 << prefix)-1)
+            if (mask, self.type & mask) in reverse_types:
+                type = reverse_types[mask, self.type & mask]
+                if prefix > 0:
+                    return '%s %#0*x' % (
+                        type, prefix//4, self.type & ((1 << prefix)-1))
+                else:
+                    return type
+        else:
+            return '%02x' % self.type
+
+    def idrepr(self):
+        return repr(self.id) if self.id != 0x3ff else '.'
+
+    def sizerepr(self):
+        return repr(self.size) if self.size != 0x3ff else 'x'
+
+    def __repr__(self):
+        return 'Tag(%r, %d, %d)' % (self.typerepr(), self.id, self.size)
+
+    def __lt__(self, other):
+        return (self.id, self.type) < (other.id, other.type)
+
+    def __bool__(self):
+        return self.isvalid
+
+    def __int__(self):
+        return self.tag
+
+    def __index__(self):
+        return self.tag
+
+class MetadataPair:
+    def __init__(self, blocks):
+        if len(blocks) > 1:
+            self.pair = [MetadataPair([block]) for block in blocks]
+            self.pair = sorted(self.pair, reverse=True)
+
+            self.data = self.pair[0].data
+            self.rev  = self.pair[0].rev
+            self.tags = self.pair[0].tags
+            self.ids  = self.pair[0].ids
+            self.log  = self.pair[0].log
+            self.all_ = self.pair[0].all_
+            return
+
+        self.pair = [self]
+        self.data = blocks[0]
+        block = self.data
+
+        self.rev, = struct.unpack('<I', block[0:4])
+        crc = binascii.crc32(block[0:4])
+
+        # parse tags
+        corrupt = False
+        tag = Tag(0xffffffff)
+        off = 4
+        self.log = []
+        self.all_ = []
+        while len(block) - off >= 4:
+            ntag, = struct.unpack('>I', block[off:off+4])
+
+            tag = Tag(int(tag) ^ ntag)
+            tag.off = off + 4
+            tag.data = block[off+4:off+tag.dsize]
+            if tag.is_('crc'):
+                crc = binascii.crc32(block[off:off+4+4], crc)
+            else:
+                crc = binascii.crc32(block[off:off+tag.dsize], crc)
+            tag.crc = crc
+            off += tag.dsize
+
+            self.all_.append(tag)
+
+            if tag.is_('crc'):
+                # is valid commit?
+                if crc != 0xffffffff:
+                    corrupt = True
+                if not corrupt:
+                    self.log = self.all_.copy()
+
+                # reset tag parsing
+                crc = 0
+                tag = Tag(int(tag) ^ ((tag.type & 1) << 31))
+
+        # find most recent tags
+        self.tags = []
+        for tag in self.log:
+            if tag.is_('crc') or tag.is_('splice'):
+                continue
+
+            if tag in self and self[tag] is tag:
+                self.tags.append(tag)
+
+        self.tags = sorted(self.tags)
+
+        # and ids
+        self.ids = list(it.takewhile(
+            lambda id: Tag('name', id, 0) in self,
+            it.count()))
+
+    def __bool__(self):
+        return bool(self.log)
+
+    def __lt__(self, other):
+        # corrupt blocks don't count
+        if not self and other:
+            return True
+
+        # use sequence arithmetic to avoid overflow
+        return not ((other.rev - self.rev) & 0x80000000)
+
+    def __contains__(self, args):
+        try:
+            self[args]
+            return True
+        except KeyError:
+            return False
+
+    def __getitem__(self, args):
+        if isinstance(args, tuple):
+            gmask, gtag = args
+        else:
+            gmask, gtag = args.mkmask(), args
+
+        gdiff = 0
+        for tag in reversed(self.log):
+            if (gmask.id != 0 and tag.is_('splice') and
+                    tag.id <= gtag.id - gdiff):
+                if tag.is_('create') and tag.id == gtag.id - gdiff:
+                    # creation point
+                    break
+
+                gdiff += tag.schunk
+
+            if (int(gmask) & int(tag)) == (int(gmask) & int(
+                    Tag(gtag.type, gtag.id - gdiff, gtag.size))):
+                if tag.size == 0x3ff:
+                    # deleted
+                    break
+
+                return tag
+
+        raise KeyError(gmask, gtag)
+
+    def _dump_tags(self, tags, truncate=True):
+        sys.stdout.write("%-8s  %-8s  %-13s %4s %4s  %s\n" % (
+            'off', 'tag', 'type', 'id', 'len',
+            'data (truncated)' if truncate else 12*' '+'data'))
+
+        for tag in tags:
+            sys.stdout.write("%08x: %08x  %-13s %4s %4s" % (
+                tag.off, tag,
+                tag.typerepr(), tag.idrepr(), tag.sizerepr()))
+            if truncate:
+                sys.stdout.write("  %-23s  %-8s\n" % (
+                    ' '.join('%02x' % c for c in tag.data[:8]),
+                    ''.join(c if c >= ' ' and c <= '~' else '.'
+                        for c in map(chr, tag.data[:8]))))
+            else:
+                sys.stdout.write("\n")
+                for i in range(0, len(tag.data), 16):
+                    sys.stdout.write("%08x: %-47s  %-16s\n" % (
+                        tag.off+i,
+                        ' '.join('%02x' % c for c in tag.data[i:i+16]),
+                        ''.join(c if c >= ' ' and c <= '~' else '.'
+                            for c in map(chr, tag.data[i:i+16]))))
+
+    def dump_tags(self, truncate=True):
+        self._dump_tags(self.tags, truncate=truncate)
+
+    def dump_log(self, truncate=True):
+        self._dump_tags(self.log, truncate=truncate)
+
+    def dump_all(self, truncate=True):
+        self._dump_tags(self.all_, truncate=truncate)
+
+def main(args):
+    blocks = []
+    with open(args.disk, 'rb') as f:
+        for block in [args.block1, args.block2]:
+            if block is None:
+                continue
+            f.seek(block * args.block_size)
+            blocks.append(f.read(args.block_size)
+                .ljust(args.block_size, b'\xff'))
+
+    # find most recent pair
+    mdir = MetadataPair(blocks)
+    if args.all:
+        mdir.dump_all(truncate=not args.no_truncate)
+    elif args.log:
+        mdir.dump_log(truncate=not args.no_truncate)
+    else:
+        mdir.dump_tags(truncate=not args.no_truncate)
+
+    return 0 if mdir else 1
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        description="Dump useful info about metadata pairs in littlefs.")
+    parser.add_argument('disk',
+        help="File representing the block device.")
+    parser.add_argument('block_size', type=lambda x: int(x, 0),
+        help="Size of a block in bytes.")
+    parser.add_argument('block1', type=lambda x: int(x, 0),
+        help="First block address for finding the metadata pair.")
+    parser.add_argument('block2', nargs='?', type=lambda x: int(x, 0),
+        help="Second block address for finding the metadata pair.")
+    parser.add_argument('-a', '--all', action='store_true',
+        help="Show all tags in log, included tags in corrupted commits.")
+    parser.add_argument('-l', '--log', action='store_true',
+        help="Show tags in log.")
+    parser.add_argument('-T', '--no-truncate', action='store_true',
+        help="Don't truncate large amounts of data in tags.")
+    sys.exit(main(parser.parse_args()))