Added allocation randomization for dynamic wear-leveling

This implements the second step of full dynamic wear-leveling, block
allocation randomization. This is the key part the uniformly distributes
wear across the filesystem, even through reboots.

The entropy actually comes from the filesystem itself, by xoring
together all of the CRCs in the metadata-pairs on the filesystem. While
this sounds like a ridiculous operation, it's easy to do when we already
scan the metadata-pairs at mount time.

This gives us a random number we can use for block allocation.
Unfortunately it's not a great general purpose random generator as the
output only changes every filesystem write. Fortunately that's exactly
when we need our allocator.

---

Additionally, the randomization created a mess for the testing
framework. Fortunately, this method of randomization is deterministic.
A very useful property for reproducing bugs.
This commit is contained in:
Christopher Haster
2018-08-09 09:06:17 -05:00
parent e4a0d586d5
commit 126ef8b07f
10 changed files with 250 additions and 59 deletions

View File

@@ -19,6 +19,40 @@
#include <inttypes.h> #include <inttypes.h>
// Emulated block device utils
static inline void lfs_emubd_tole32(lfs_emubd_t *emu) {
emu->cfg.read_size = lfs_tole32(emu->cfg.read_size);
emu->cfg.prog_size = lfs_tole32(emu->cfg.prog_size);
emu->cfg.block_size = lfs_tole32(emu->cfg.block_size);
emu->cfg.block_count = lfs_tole32(emu->cfg.block_count);
emu->stats.read_count = lfs_tole32(emu->stats.read_count);
emu->stats.prog_count = lfs_tole32(emu->stats.prog_count);
emu->stats.erase_count = lfs_tole32(emu->stats.erase_count);
for (int i = 0; i < sizeof(emu->history.blocks) /
sizeof(emu->history.blocks[0]); i++) {
emu->history.blocks[i] = lfs_tole32(emu->history.blocks[i]);
}
}
static inline void lfs_emubd_fromle32(lfs_emubd_t *emu) {
emu->cfg.read_size = lfs_fromle32(emu->cfg.read_size);
emu->cfg.prog_size = lfs_fromle32(emu->cfg.prog_size);
emu->cfg.block_size = lfs_fromle32(emu->cfg.block_size);
emu->cfg.block_count = lfs_fromle32(emu->cfg.block_count);
emu->stats.read_count = lfs_fromle32(emu->stats.read_count);
emu->stats.prog_count = lfs_fromle32(emu->stats.prog_count);
emu->stats.erase_count = lfs_fromle32(emu->stats.erase_count);
for (int i = 0; i < sizeof(emu->history.blocks) /
sizeof(emu->history.blocks[0]); i++) {
emu->history.blocks[i] = lfs_fromle32(emu->history.blocks[i]);
}
}
// Block device emulated on existing filesystem // Block device emulated on existing filesystem
int lfs_emubd_create(const struct lfs_config *cfg, const char *path) { int lfs_emubd_create(const struct lfs_config *cfg, const char *path) {
lfs_emubd_t *emu = cfg->context; lfs_emubd_t *emu = cfg->context;
@@ -46,13 +80,13 @@ int lfs_emubd_create(const struct lfs_config *cfg, const char *path) {
} }
// Load stats to continue incrementing // Load stats to continue incrementing
snprintf(emu->child, LFS_NAME_MAX, "stats"); snprintf(emu->child, LFS_NAME_MAX, ".stats");
FILE *f = fopen(emu->path, "r"); FILE *f = fopen(emu->path, "r");
if (!f) { if (!f) {
return -errno; memset(&emu->stats, 0, sizeof(emu->stats));
} } else {
size_t res = fread(&emu->stats, sizeof(emu->stats), 1, f); size_t res = fread(&emu->stats, sizeof(emu->stats), 1, f);
lfs_emubd_fromle32(emu);
if (res < 1) { if (res < 1) {
return -errno; return -errno;
} }
@@ -61,6 +95,25 @@ int lfs_emubd_create(const struct lfs_config *cfg, const char *path) {
if (err) { if (err) {
return -errno; return -errno;
} }
}
// Load history
snprintf(emu->child, LFS_NAME_MAX, ".history");
f = fopen(emu->path, "r");
if (!f) {
memset(&emu->history, 0, sizeof(emu->history));
} else {
size_t res = fread(&emu->history, sizeof(emu->history), 1, f);
lfs_emubd_fromle32(emu);
if (res < 1) {
return -errno;
}
err = fclose(f);
if (err) {
return -errno;
}
}
return 0; return 0;
} }
@@ -161,6 +214,13 @@ int lfs_emubd_prog(const struct lfs_config *cfg, lfs_block_t block,
return -errno; return -errno;
} }
// update history and stats
if (block != emu->history.blocks[0]) {
memcpy(&emu->history.blocks[1], &emu->history.blocks[0],
sizeof(emu->history) - sizeof(emu->history.blocks[0]));
emu->history.blocks[0] = block;
}
emu->stats.prog_count += 1; emu->stats.prog_count += 1;
return 0; return 0;
} }
@@ -206,13 +266,15 @@ int lfs_emubd_sync(const struct lfs_config *cfg) {
lfs_emubd_t *emu = cfg->context; lfs_emubd_t *emu = cfg->context;
// Just write out info/stats for later lookup // Just write out info/stats for later lookup
snprintf(emu->child, LFS_NAME_MAX, "config"); snprintf(emu->child, LFS_NAME_MAX, ".config");
FILE *f = fopen(emu->path, "w"); FILE *f = fopen(emu->path, "w");
if (!f) { if (!f) {
return -errno; return -errno;
} }
lfs_emubd_tole32(emu);
size_t res = fwrite(&emu->cfg, sizeof(emu->cfg), 1, f); size_t res = fwrite(&emu->cfg, sizeof(emu->cfg), 1, f);
lfs_emubd_fromle32(emu);
if (res < 1) { if (res < 1) {
return -errno; return -errno;
} }
@@ -222,13 +284,33 @@ int lfs_emubd_sync(const struct lfs_config *cfg) {
return -errno; return -errno;
} }
snprintf(emu->child, LFS_NAME_MAX, "stats"); snprintf(emu->child, LFS_NAME_MAX, ".stats");
f = fopen(emu->path, "w"); f = fopen(emu->path, "w");
if (!f) { if (!f) {
return -errno; return -errno;
} }
lfs_emubd_tole32(emu);
res = fwrite(&emu->stats, sizeof(emu->stats), 1, f); res = fwrite(&emu->stats, sizeof(emu->stats), 1, f);
lfs_emubd_fromle32(emu);
if (res < 1) {
return -errno;
}
err = fclose(f);
if (err) {
return -errno;
}
snprintf(emu->child, LFS_NAME_MAX, ".history");
f = fopen(emu->path, "w");
if (!f) {
return -errno;
}
lfs_emubd_tole32(emu);
res = fwrite(&emu->history, sizeof(emu->history), 1, f);
lfs_emubd_fromle32(emu);
if (res < 1) { if (res < 1) {
return -errno; return -errno;
} }

View File

@@ -45,6 +45,10 @@ typedef struct lfs_emubd {
uint64_t erase_count; uint64_t erase_count;
} stats; } stats;
struct {
lfs_block_t blocks[4];
} history;
struct { struct {
uint32_t read_size; uint32_t read_size;
uint32_t prog_size; uint32_t prog_size;

15
lfs.c
View File

@@ -879,6 +879,8 @@ static int32_t lfs_dir_fetchmatch(lfs_t *lfs,
dir->tail[1] = temptail[1]; dir->tail[1] = temptail[1];
dir->split = tempsplit; dir->split = tempsplit;
dir->locals = templocals; dir->locals = templocals;
lfs->seed ^= crc;
crc = 0xffffffff; crc = 0xffffffff;
} else { } else {
err = lfs_bd_crc32(lfs, dir->pair[0], err = lfs_bd_crc32(lfs, dir->pair[0],
@@ -2874,6 +2876,7 @@ static int lfs_init(lfs_t *lfs, const struct lfs_config *cfg) {
lfs->root[0] = 0xffffffff; lfs->root[0] = 0xffffffff;
lfs->root[1] = 0xffffffff; lfs->root[1] = 0xffffffff;
lfs->mlist = NULL; lfs->mlist = NULL;
lfs->seed = 0;
lfs->globals.s.movepair[0] = 0xffffffff; lfs->globals.s.movepair[0] = 0xffffffff;
lfs->globals.s.movepair[1] = 0xffffffff; lfs->globals.s.movepair[1] = 0xffffffff;
lfs->globals.s.moveid = 0x3ff; lfs->globals.s.moveid = 0x3ff;
@@ -2962,12 +2965,6 @@ int lfs_mount(lfs_t *lfs, const struct lfs_config *cfg) {
return err; return err;
} }
// setup free lookahead
lfs->free.off = 0;
lfs->free.size = 0;
lfs->free.i = 0;
lfs_alloc_ack(lfs);
// load superblock // load superblock
lfs_mdir_t root; lfs_mdir_t root;
err = lfs_dir_fetch(lfs, &root, (const lfs_block_t[2]){0, 1}); err = lfs_dir_fetch(lfs, &root, (const lfs_block_t[2]){0, 1});
@@ -3065,6 +3062,12 @@ int lfs_mount(lfs_t *lfs, const struct lfs_config *cfg) {
lfs->globals.s.moveid); lfs->globals.s.moveid);
} }
// setup free lookahead
lfs->free.off = lfs->seed % lfs->cfg->block_size;
lfs->free.size = 0;
lfs->free.i = 0;
lfs_alloc_ack(lfs);
return 0; return 0;
cleanup: cleanup:

1
lfs.h
View File

@@ -382,6 +382,7 @@ typedef struct lfs {
lfs_block_t root[2]; lfs_block_t root[2];
lfs_mlist_t *mlist; lfs_mlist_t *mlist;
uint32_t seed;
lfs_global_t globals; lfs_global_t globals;
lfs_global_t locals; lfs_global_t locals;

View File

@@ -3,22 +3,12 @@
import struct import struct
import sys import sys
import os import os
import argparse
def main(*paths): def corrupt(block):
# find most recent block with open(block, 'r+b') as file:
file = None # skip rev
rev = None file.read(4)
for path in paths:
try:
nfile = open(path, 'r+b')
nrev, = struct.unpack('<I', nfile.read(4))
assert rev != nrev
if not file or ((rev - nrev) & 0x80000000):
file = nfile
rev = nrev
except IOError:
pass
# go to last commit # go to last commit
tag = 0 tag = 0
@@ -35,5 +25,19 @@ def main(*paths):
file.seek(-((tag & 0xfff) + 3), os.SEEK_CUR) file.seek(-((tag & 0xfff) + 3), os.SEEK_CUR)
file.truncate() file.truncate()
def main(args):
if args.n or not args.blocks:
with open('blocks/.history', 'rb') as file:
for i in range(int(args.n or 1)):
last, = struct.unpack('<I', file.read(4))
args.blocks.append('blocks/%x' % last)
for block in args.blocks:
print 'corrupting %s' % block
corrupt(block)
if __name__ == "__main__": if __name__ == "__main__":
main(*sys.argv[1:]) parser = argparse.ArgumentParser()
parser.add_argument('-n')
parser.add_argument('blocks', nargs='*')
main(parser.parse_args())

98
tests/debug.py Executable file
View File

@@ -0,0 +1,98 @@
#!/usr/bin/env python2
import struct
import binascii
TYPES = {
(0x1ff, 0x001): 'reg',
(0x1ff, 0x002): 'dir',
(0x1ff, 0x011): 'superblock',
(0x1ff, 0x012): 'root',
(0x1ff, 0x030): 'delete',
(0x1f0, 0x080): 'globals',
(0x1ff, 0x0c0): 'tail soft',
(0x1ff, 0x0c1): 'tail hard',
(0x1ff, 0x0f0): 'crc',
(0x1ff, 0x040): 'struct dir',
(0x1ff, 0x041): 'struct inline',
(0x1ff, 0x042): 'struct ctz',
(0x100, 0x100): 'attr',
}
def typeof(type):
for prefix in range(9):
mask = 0x1ff & ~((1 << prefix)-1)
if (mask, type & mask) in TYPES:
return TYPES[mask, type & mask] + (
' [%0*x]' % (prefix/4, type & ((1 << prefix)-1))
if prefix else '')
else:
return '[%02x]' % type
def main(*blocks):
# find most recent block
file = None
rev = None
crc = None
versions = []
for block in blocks:
try:
nfile = open(block, 'rb')
ndata = nfile.read(4)
ncrc = binascii.crc32(ndata)
nrev, = struct.unpack('<I', ndata)
assert rev != nrev
if not file or ((rev - nrev) & 0x80000000):
file = nfile
rev = nrev
crc = ncrc
versions.append((nrev, '%s (rev %d)' % (block, nrev)))
except IOError:
pass
print "--- %s ---" % ', '.join(v for _,v in sorted(versions, reverse=True))
# go through each tag, print useful information
print "%-4s %-8s %-14s %3s %3s %s" % (
'off', 'tag', 'type', 'id', 'len', 'dump')
tag = 0
off = 4
while True:
try:
data = file.read(4)
crc = binascii.crc32(data, crc)
ntag, = struct.unpack('<I', data)
except struct.error:
break
tag ^= ntag
off += 4
type = (tag & 0x7fc00000) >> 22
id = (tag & 0x003ff000) >> 12
size = (tag & 0x00000fff) >> 0
data = file.read(size)
if type == 0x0f0:
crc = binascii.crc32(data[:4], crc)
else:
crc = binascii.crc32(data, crc)
print '%04x: %08x %-14s %3s %3d %-23s %-8s' % (
off, tag,
typeof(type) + (' bad!' if type == 0x0f0 and ~crc else ''),
id if id != 0x3ff else '.', size,
' '.join('%02x' % ord(c) for c in data[:8]),
''.join(c if c >= ' ' and c <= '~' else '.' for c in data[:8]))
off += tag & 0xfff
if type == 0x0f0:
crc = 0
if __name__ == "__main__":
import sys
main(*sys.argv[1:])

View File

@@ -7,7 +7,7 @@ import os
import re import re
def main(): def main():
with open('blocks/config') as file: with open('blocks/.config') as file:
s = struct.unpack('<LLLL', file.read()) s = struct.unpack('<LLLL', file.read())
print 'read_size: %d' % s[0] print 'read_size: %d' % s[0]
print 'prog_size: %d' % s[1] print 'prog_size: %d' % s[1]
@@ -18,7 +18,7 @@ def main():
os.path.getsize(os.path.join('blocks', f)) os.path.getsize(os.path.join('blocks', f))
for f in os.listdir('blocks') if re.match('\d+', f)) for f in os.listdir('blocks') if re.match('\d+', f))
with open('blocks/stats') as file: with open('blocks/.stats') as file:
s = struct.unpack('<QQQ', file.read()) s = struct.unpack('<QQQ', file.read())
print 'read_count: %d' % s[0] print 'read_count: %d' % s[0]
print 'prog_count: %d' % s[1] print 'prog_count: %d' % s[1]

View File

@@ -71,24 +71,25 @@ echo "--- Sanity check ---"
rm -rf blocks rm -rf blocks
lfs_mktree lfs_mktree
lfs_chktree lfs_chktree
BLOCKS="$(ls blocks | grep -vw '[01]')"
echo "--- Block corruption ---" echo "--- Block corruption ---"
for i in {2..33} for b in $BLOCKS
do do
rm -rf blocks rm -rf blocks
mkdir blocks mkdir blocks
ln -s /dev/zero blocks/$(printf '%x' $i) ln -s /dev/zero blocks/$b
lfs_mktree lfs_mktree
lfs_chktree lfs_chktree
done done
echo "--- Block persistance ---" echo "--- Block persistance ---"
for i in {2..33} for b in $BLOCKS
do do
rm -rf blocks rm -rf blocks
mkdir blocks mkdir blocks
lfs_mktree lfs_mktree
chmod a-w blocks/$(printf '%x' $i) || true chmod a-w blocks/$b
lfs_mktree lfs_mktree
lfs_chktree lfs_chktree
done done
@@ -96,7 +97,7 @@ done
echo "--- Big region corruption ---" echo "--- Big region corruption ---"
rm -rf blocks rm -rf blocks
mkdir blocks mkdir blocks
for i in {2..255} for i in {2..512}
do do
ln -s /dev/zero blocks/$(printf '%x' $i) ln -s /dev/zero blocks/$(printf '%x' $i)
done done
@@ -106,7 +107,7 @@ lfs_chktree
echo "--- Alternating corruption ---" echo "--- Alternating corruption ---"
rm -rf blocks rm -rf blocks
mkdir blocks mkdir blocks
for i in {2..511..2} for i in {2..1024..2}
do do
ln -s /dev/zero blocks/$(printf '%x' $i) ln -s /dev/zero blocks/$(printf '%x' $i)
done done

View File

@@ -59,7 +59,7 @@ tests/test.py << TEST
lfs_rename(&lfs, "b/hello", "c/hello") => 0; lfs_rename(&lfs, "b/hello", "c/hello") => 0;
lfs_unmount(&lfs) => 0; lfs_unmount(&lfs) => 0;
TEST TEST
tests/corrupt.py blocks/{4,5} tests/corrupt.py -n 1
tests/test.py << TEST tests/test.py << TEST
lfs_mount(&lfs, &cfg) => 0; lfs_mount(&lfs, &cfg) => 0;
lfs_dir_open(&lfs, &dir[0], "b") => 0; lfs_dir_open(&lfs, &dir[0], "b") => 0;
@@ -86,8 +86,7 @@ tests/test.py << TEST
lfs_rename(&lfs, "c/hello", "d/hello") => 0; lfs_rename(&lfs, "c/hello", "d/hello") => 0;
lfs_unmount(&lfs) => 0; lfs_unmount(&lfs) => 0;
TEST TEST
tests/corrupt.py blocks/{6,7} tests/corrupt.py -n 2
tests/corrupt.py blocks/{8,9}
tests/test.py << TEST tests/test.py << TEST
lfs_mount(&lfs, &cfg) => 0; lfs_mount(&lfs, &cfg) => 0;
lfs_dir_open(&lfs, &dir[0], "c") => 0; lfs_dir_open(&lfs, &dir[0], "c") => 0;
@@ -166,7 +165,7 @@ tests/test.py << TEST
lfs_rename(&lfs, "b/hi", "c/hi") => 0; lfs_rename(&lfs, "b/hi", "c/hi") => 0;
lfs_unmount(&lfs) => 0; lfs_unmount(&lfs) => 0;
TEST TEST
tests/corrupt.py blocks/{4,5} tests/corrupt.py -n 1
tests/test.py << TEST tests/test.py << TEST
lfs_mount(&lfs, &cfg) => 0; lfs_mount(&lfs, &cfg) => 0;
lfs_dir_open(&lfs, &dir[0], "b") => 0; lfs_dir_open(&lfs, &dir[0], "b") => 0;
@@ -193,8 +192,7 @@ tests/test.py << TEST
lfs_rename(&lfs, "c/hi", "d/hi") => 0; lfs_rename(&lfs, "c/hi", "d/hi") => 0;
lfs_unmount(&lfs) => 0; lfs_unmount(&lfs) => 0;
TEST TEST
tests/corrupt.py blocks/{6,7} tests/corrupt.py -n 2
tests/corrupt.py blocks/{8,9}
tests/test.py << TEST tests/test.py << TEST
lfs_mount(&lfs, &cfg) => 0; lfs_mount(&lfs, &cfg) => 0;
lfs_dir_open(&lfs, &dir[0], "c") => 0; lfs_dir_open(&lfs, &dir[0], "c") => 0;

View File

@@ -17,7 +17,7 @@ tests/test.py << TEST
TEST TEST
# corrupt most recent commit, this should be the update to the previous # corrupt most recent commit, this should be the update to the previous
# linked-list entry and should orphan the child # linked-list entry and should orphan the child
tests/corrupt.py blocks/{6,7} tests/corrupt.py
tests/test.py << TEST tests/test.py << TEST
lfs_mount(&lfs, &cfg) => 0; lfs_mount(&lfs, &cfg) => 0;