Added/improved disk-reading debug scripts

Also fixed a bug in dir splitting when there's a large number of open
files, which was the main reason I was trying to make it easier to debug
disk images.

One part of the recent test changes was to move away from the
file-per-block emubd and instead simulate storage with a single
contiguous file. The file-per-block format was marginally useful
at the beginning, but as the remaining bugs get more subtle, it
becomes more useful to inspect littlefs through scripts that
make the underlying metadata more human-readable.

The key benefit of switching to a contiguous file is these same
scripts can be reused for real disk images and can even read through
/dev/sdb or similar.

- ./scripts/readblock.py disk block_size block

  off       data
  00000000: 71 01 00 00 f0 0f ff f7 6c 69 74 74 6c 65 66 73  q.......littlefs
  00000010: 2f e0 00 10 00 00 02 00 00 02 00 00 00 04 00 00  /...............
  00000020: ff 00 00 00 ff ff ff 7f fe 03 00 00 20 00 04 19  ...............
  00000030: 61 00 00 0c 00 62 20 30 0c 09 a0 01 00 00 64 00  a....b 0......d.
  ...

  readblock.py prints a hex dump of a given block on disk. It's basically
  just "dd if=disk bs=block_size count=1 skip=block | xxd -g1 -" but with
  less typing.

- ./scripts/readmdir.py disk block_size block1 block2

  off       tag       type            id  len  data (truncated)
  0000003b: 0020000a  dir              0   10  63 6f 6c 64 63 6f 66 66 coldcoff
  00000049: 20000008  dirstruct        0    8  02 02 00 00 03 02 00 00 ........
  00000008: 00200409  dir              1    9  68 6f 74 63 6f 66 66 65 hotcoffe
  00000015: 20000408  dirstruct        1    8  fe 01 00 00 ff 01 00 00 ........

  readmdir.py prints info about the tags in a metadata pair on disk. It
  can print the currently active tags as well as the raw log of the
  metadata pair.

- ./scripts/readtree.py disk block_size

  superblock "littlefs"
    version v2.0
    block_size 512
    block_count 1024
    name_max 255
    file_max 2147483647
    attr_max 1022
  gstate 0x000000000000000000000000
  dir "/"
  mdir {0x0, 0x1} rev 3
  v id 0 superblock "littlefs" inline size 24
  mdir {0x77, 0x78} rev 1
    id 0 dir "coffee" dir {0x1fc, 0x1fd}
  dir "/coffee"
  mdir {0x1fd, 0x1fc} rev 2
    id 0 dir "coldcoffee" dir {0x202, 0x203}
    id 1 dir "hotcoffee" dir {0x1fe, 0x1ff}
  dir "/coffee/coldcoffee"
  mdir {0x202, 0x203} rev 1
  dir "/coffee/warmcoffee"
  mdir {0x200, 0x201} rev 1

  readtree.py parses the littlefs tree and prints info about the
  semantics of what's on disk. This includes the superblock,
  global-state, and directories/metadata-pairs. It doesn't print
  the filesystem tree though, that could be a different tool.
This commit is contained in:
Christopher Haster
2020-01-18 20:20:43 -06:00
parent fb65057a3c
commit 9453ebd15d
8 changed files with 645 additions and 34 deletions

332
scripts/readmdir.py Executable file
View File

@@ -0,0 +1,332 @@
#!/usr/bin/env python3
import struct
import binascii
import itertools as it
TAG_TYPES = {
'splice': (0x700, 0x400),
'create': (0x7ff, 0x401),
'delete': (0x7ff, 0x4ff),
'name': (0x700, 0x000),
'reg': (0x7ff, 0x001),
'dir': (0x7ff, 0x002),
'superblock': (0x7ff, 0x0ff),
'struct': (0x700, 0x200),
'dirstruct': (0x7ff, 0x200),
'ctzstruct': (0x7ff, 0x202),
'inlinestruct': (0x7ff, 0x201),
'userattr': (0x700, 0x300),
'tail': (0x700, 0x600),
'softtail': (0x7ff, 0x600),
'hardtail': (0x7ff, 0x601),
'gstate': (0x700, 0x700),
'movestate': (0x7ff, 0x7ff),
'crc': (0x700, 0x500),
}
class Tag:
def __init__(self, *args):
if len(args) == 1:
self.tag = args[0]
elif len(args) == 3:
if isinstance(args[0], str):
type = TAG_TYPES[args[0]][1]
else:
type = args[0]
if isinstance(args[1], str):
id = int(args[1], 0) if args[1] not in 'x.' else 0x3ff
else:
id = args[1]
if isinstance(args[2], str):
size = int(args[2], str) if args[2] not in 'x.' else 0x3ff
else:
size = args[2]
self.tag = (type << 20) | (id << 10) | size
else:
assert False
@property
def isvalid(self):
return not bool(self.tag & 0x80000000)
@property
def isattr(self):
return not bool(self.tag & 0x40000000)
@property
def iscompactable(self):
return bool(self.tag & 0x20000000)
@property
def isunique(self):
return not bool(self.tag & 0x10000000)
@property
def type(self):
return (self.tag & 0x7ff00000) >> 20
@property
def type1(self):
return (self.tag & 0x70000000) >> 20
@property
def type3(self):
return (self.tag & 0x7ff00000) >> 20
@property
def id(self):
return (self.tag & 0x000ffc00) >> 10
@property
def size(self):
return (self.tag & 0x000003ff) >> 0
@property
def dsize(self):
return 4 + (self.size if self.size != 0x3ff else 0)
@property
def chunk(self):
return self.type & 0xff
@property
def schunk(self):
return struct.unpack('b', struct.pack('B', self.chunk))[0]
def is_(self, type):
return (self.type & TAG_TYPES[type][0]) == TAG_TYPES[type][1]
def mkmask(self):
return Tag(
0x700 if self.isunique else 0x7ff,
0x3ff if self.isattr else 0,
0)
def typerepr(self):
if self.is_('crc') and getattr(self, 'crc', 0xffffffff) != 0xffffffff:
return 'crc (bad)'
reverse_types = {v: k for k, v in TAG_TYPES.items()}
for prefix in range(12):
mask = 0x7ff & ~((1 << prefix)-1)
if (mask, self.type & mask) in reverse_types:
type = reverse_types[mask, self.type & mask]
if prefix > 0:
return '%s %#0*x' % (
type, prefix//4, self.type & ((1 << prefix)-1))
else:
return type
else:
return '%02x' % self.type
def idrepr(self):
return repr(self.id) if self.id != 0x3ff else '.'
def sizerepr(self):
return repr(self.size) if self.size != 0x3ff else 'x'
def __repr__(self):
return 'Tag(%r, %d, %d)' % (self.typerepr(), self.id, self.size)
def __lt__(self, other):
return (self.id, self.type) < (other.id, other.type)
def __bool__(self):
return self.isvalid
def __int__(self):
return self.tag
def __index__(self):
return self.tag
class MetadataPair:
def __init__(self, blocks):
if len(blocks) > 1:
self.pair = [MetadataPair([block]) for block in blocks]
self.pair = sorted(self.pair, reverse=True)
self.data = self.pair[0].data
self.rev = self.pair[0].rev
self.tags = self.pair[0].tags
self.ids = self.pair[0].ids
self.log = self.pair[0].log
self.all_ = self.pair[0].all_
return
self.pair = [self]
self.data = blocks[0]
block = self.data
self.rev, = struct.unpack('<I', block[0:4])
crc = binascii.crc32(block[0:4])
# parse tags
corrupt = False
tag = Tag(0xffffffff)
off = 4
self.log = []
self.all_ = []
while len(block) - off >= 4:
ntag, = struct.unpack('>I', block[off:off+4])
tag = Tag(int(tag) ^ ntag)
tag.off = off + 4
tag.data = block[off+4:off+tag.dsize]
if tag.is_('crc'):
crc = binascii.crc32(block[off:off+4+4], crc)
else:
crc = binascii.crc32(block[off:off+tag.dsize], crc)
tag.crc = crc
off += tag.dsize
self.all_.append(tag)
if tag.is_('crc'):
# is valid commit?
if crc != 0xffffffff:
corrupt = True
if not corrupt:
self.log = self.all_.copy()
# reset tag parsing
crc = 0
tag = Tag(int(tag) ^ ((tag.type & 1) << 31))
# find most recent tags
self.tags = []
for tag in self.log:
if tag.is_('crc') or tag.is_('splice'):
continue
if tag in self and self[tag] is tag:
self.tags.append(tag)
self.tags = sorted(self.tags)
# and ids
self.ids = list(it.takewhile(
lambda id: Tag('name', id, 0) in self,
it.count()))
def __bool__(self):
return bool(self.log)
def __lt__(self, other):
# corrupt blocks don't count
if not self and other:
return True
# use sequence arithmetic to avoid overflow
return not ((other.rev - self.rev) & 0x80000000)
def __contains__(self, args):
try:
self[args]
return True
except KeyError:
return False
def __getitem__(self, args):
if isinstance(args, tuple):
gmask, gtag = args
else:
gmask, gtag = args.mkmask(), args
gdiff = 0
for tag in reversed(self.log):
if (gmask.id != 0 and tag.is_('splice') and
tag.id <= gtag.id - gdiff):
if tag.is_('create') and tag.id == gtag.id - gdiff:
# creation point
break
gdiff += tag.schunk
if (int(gmask) & int(tag)) == (int(gmask) & int(
Tag(gtag.type, gtag.id - gdiff, gtag.size))):
if tag.size == 0x3ff:
# deleted
break
return tag
raise KeyError(gmask, gtag)
def _dump_tags(self, tags, truncate=True):
sys.stdout.write("%-8s %-8s %-13s %4s %4s %s\n" % (
'off', 'tag', 'type', 'id', 'len',
'data (truncated)' if truncate else 12*' '+'data'))
for tag in tags:
sys.stdout.write("%08x: %08x %-13s %4s %4s" % (
tag.off, tag,
tag.typerepr(), tag.idrepr(), tag.sizerepr()))
if truncate:
sys.stdout.write(" %-23s %-8s\n" % (
' '.join('%02x' % c for c in tag.data[:8]),
''.join(c if c >= ' ' and c <= '~' else '.'
for c in map(chr, tag.data[:8]))))
else:
sys.stdout.write("\n")
for i in range(0, len(tag.data), 16):
sys.stdout.write("%08x: %-47s %-16s\n" % (
tag.off+i,
' '.join('%02x' % c for c in tag.data[i:i+16]),
''.join(c if c >= ' ' and c <= '~' else '.'
for c in map(chr, tag.data[i:i+16]))))
def dump_tags(self, truncate=True):
self._dump_tags(self.tags, truncate=truncate)
def dump_log(self, truncate=True):
self._dump_tags(self.log, truncate=truncate)
def dump_all(self, truncate=True):
self._dump_tags(self.all_, truncate=truncate)
def main(args):
blocks = []
with open(args.disk, 'rb') as f:
for block in [args.block1, args.block2]:
if block is None:
continue
f.seek(block * args.block_size)
blocks.append(f.read(args.block_size)
.ljust(args.block_size, b'\xff'))
# find most recent pair
mdir = MetadataPair(blocks)
if args.all:
mdir.dump_all(truncate=not args.no_truncate)
elif args.log:
mdir.dump_log(truncate=not args.no_truncate)
else:
mdir.dump_tags(truncate=not args.no_truncate)
return 0 if mdir else 1
if __name__ == "__main__":
import argparse
import sys
parser = argparse.ArgumentParser(
description="Dump useful info about metadata pairs in littlefs.")
parser.add_argument('disk',
help="File representing the block device.")
parser.add_argument('block_size', type=lambda x: int(x, 0),
help="Size of a block in bytes.")
parser.add_argument('block1', type=lambda x: int(x, 0),
help="First block address for finding the metadata pair.")
parser.add_argument('block2', nargs='?', type=lambda x: int(x, 0),
help="Second block address for finding the metadata pair.")
parser.add_argument('-a', '--all', action='store_true',
help="Show all tags in log, included tags in corrupted commits.")
parser.add_argument('-l', '--log', action='store_true',
help="Show tags in log.")
parser.add_argument('-T', '--no-truncate', action='store_true',
help="Don't truncate large amounts of data in tags.")
sys.exit(main(parser.parse_args()))