Fixed broken wear-leveling when block_cycles = 2n-1

This was an interesting issue found during a GitHub discussion with
rmollway and thrasher8390.

Blocks in the metadata-pair are relocated every "block_cycles", or, more
mathy, when rev % block_cycles == 0 as long as rev += 1 every block write.

But there's a problem, rev isn't += 1 every block write. There are two
blocks in a metadata-pair, so looking at it from each blocks
perspective, rev += 2 every block write.

This leads to a sort of aliasing issue, where, if block_cycles is
divisible by 2, one block in the metadata-pair is always relocated, and
the other block is _never_ relocated. Causing a complete failure of
block-level wear-leveling.

Fortunately, because of a previous workaround to avoid block_cycles = 1
(since this will cause the relocation algorithm to never terminate), the
actual math is rev % (block_cycles+1) == 0. This means the bug only
shows its head in the much less likely case where block_cycles is a
multiple of 2 plus 1, or, in more mathy terms, block_cycles = 2n+1 for
some n.

To workaround this we can bitwise or our block_cycles with 1 to force it
to never be a multiple of 2n.

(Maybe we should do this during initialization? But then block_cycles
would need to be mutable.)

---

There's a few unrelated changes mixed into this commit that shouldn't be
there since I added this as part of a branch of bug fixes I'm putting
together rather hastily, so unfortunately this is not easily cherry-pickable.
This commit is contained in:
Christopher Haster
2020-02-09 08:52:20 -06:00
parent 6a550844f4
commit fe957de892
2 changed files with 153 additions and 33 deletions

52
lfs.c
View File

@@ -1486,7 +1486,7 @@ static int lfs_dir_compact(lfs_t *lfs,
// increment revision count
uint32_t nrev = dir->rev + 1;
if (lfs->cfg->block_cycles > 0 &&
(nrev % (lfs->cfg->block_cycles+1) == 0)) {
(nrev % ((lfs->cfg->block_cycles+1)|1) == 0)) {
if (lfs_pair_cmp(dir->pair, (const lfs_block_t[2]){0, 1}) == 0) {
// oh no! we're writing too much to the superblock,
// should we expand?
@@ -1524,6 +1524,7 @@ static int lfs_dir_compact(lfs_t *lfs,
} else {
// we're writing too much, time to relocate
tired = true;
//nrev += 1;//lfs->cfg->block_cycles & 1; // TODO do this here?
goto relocate;
}
}
@@ -1631,17 +1632,33 @@ static int lfs_dir_compact(lfs_t *lfs,
// TODO huh?
LFS_ASSERT(commit.off % lfs->cfg->prog_size == 0);
// update gstate
lfs->gdelta = (lfs_gstate_t){0};
if (!relocated) {
lfs->gdisk = lfs->gstate;
}
// TODO here??
if (relocated) {
// update references if we relocated
LFS_DEBUG("Relocating %"PRIx32" %"PRIx32" -> %"PRIx32" %"PRIx32,
oldpair[0], oldpair[1], dir->pair[0], dir->pair[1]);
int err = lfs_fs_relocate(lfs, oldpair, dir->pair);
if (err) {
// TODO make better
//dir->pair[1] = oldpair[1]; //
return err;
}
// LFS_DEBUG("Relocated %"PRIx32" %"PRIx32" -> %"PRIx32" %"PRIx32,
// oldpair[0], oldpair[1], dir->pair[0], dir->pair[1]);
}
// successful compaction, swap dir pair to indicate most recent
lfs_pair_swap(dir->pair);
dir->rev = nrev;
dir->count = end - begin;
dir->off = commit.off;
dir->etag = commit.ptag;
// update gstate
lfs->gdelta = (lfs_gstate_t){0};
if (!relocated) {
lfs->gdisk = lfs->gstate;
}
}
break;
@@ -1669,20 +1686,6 @@ relocate:
continue;
}
// TODO here??
if (relocated) {
// update references if we relocated
LFS_DEBUG("Relocating %"PRIx32" %"PRIx32" -> %"PRIx32" %"PRIx32,
oldpair[0], oldpair[1], dir->pair[0], dir->pair[1]);
int err = lfs_fs_relocate(lfs, oldpair, dir->pair);
if (err) {
// TODO make better
//dir->pair[1] = oldpair[1]; //
return err;
}
LFS_DEBUG("Relocated %"PRIx32" %"PRIx32" -> %"PRIx32" %"PRIx32,
oldpair[0], oldpair[1], dir->pair[0], dir->pair[1]);
}
return 0;
}
@@ -1709,6 +1712,7 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
// calculate changes to the directory
bool hasdelete = false;
lfs_mdir_t olddir = *dir; // TODO is this a good idea?
for (int i = 0; i < attrcount; i++) {
if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_CREATE) {
dir->count += 1;
@@ -1729,6 +1733,7 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
lfs_mdir_t pdir;
int err = lfs_fs_pred(lfs, dir->pair, &pdir);
if (err && err != LFS_ERR_NOENT) {
*dir = olddir;
return err;
}
@@ -1761,6 +1766,7 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
if (err == LFS_ERR_NOSPC || err == LFS_ERR_CORRUPT) {
goto compact;
}
*dir = olddir;
return err;
}
@@ -1773,6 +1779,7 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
if (!lfs_gstate_iszero(&delta)) {
err = lfs_dir_getgstate(lfs, dir, &delta);
if (err) {
*dir = olddir;
return err;
}
@@ -1784,6 +1791,7 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
if (err == LFS_ERR_NOSPC || err == LFS_ERR_CORRUPT) {
goto compact;
}
*dir = olddir;
return err;
}
}
@@ -1794,6 +1802,7 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
if (err == LFS_ERR_NOSPC || err == LFS_ERR_CORRUPT) {
goto compact;
}
*dir = olddir;
return err;
}
@@ -1812,6 +1821,7 @@ compact:
int err = lfs_dir_compact(lfs, dir, attrs, attrcount,
dir, 0, dir->count);
if (err) {
*dir = olddir;
return err;
}
}
@@ -3845,7 +3855,7 @@ int lfs_fs_traverse(lfs_t *lfs,
LFS_TRACE("lfs_fs_traverse -> %d", err);
return err;
}
} else if (lfs_gstate_hasorphans(&lfs->gstate) &&
} else if (/*lfs_gstate_hasorphans(&lfs->gstate) && TODO maybe report size-dirs/2 ? */
lfs_tag_type3(tag) == LFS_TYPE_DIRSTRUCT) {
// TODO HMMMMMM HMMMMMMMMMMMMMMMMMMM
for (int i = 0; i < 2; i++) {

View File

@@ -333,3 +333,113 @@ exhausted:
// check we increased the lifetime by 2x with ~10% error
LFS_ASSERT(run_cycles[1]*110/100 > 2*run_cycles[0]);
'''
[[case]] # test that we wear blocks roughly evenly
define.LFS_ERASE_CYCLES = 0xffffffff
define.LFS_BLOCK_COUNT = 256 # small bd so test runs faster
define.LFS_BLOCK_CYCLES = [5, 4, 3, 2, 1]
#define.LFS_BLOCK_CYCLES = [4, 2]
define.CYCLES = 100
define.FILES = 10
code = '''
lfs_format(&lfs, &cfg) => 0;
lfs_mount(&lfs, &cfg) => 0;
lfs_mkdir(&lfs, "roadrunner") => 0;
lfs_unmount(&lfs) => 0;
uint32_t cycle = 0;
while (cycle < CYCLES) {
lfs_mount(&lfs, &cfg) => 0;
for (uint32_t i = 0; i < FILES; i++) {
// chose name, roughly random seed, and random 2^n size
sprintf(path, "roadrunner/test%d", i);
srand(cycle * i);
size = 1 << 4; //((rand() % 10)+2);
lfs_file_open(&lfs, &file, path,
LFS_O_WRONLY | LFS_O_CREAT | LFS_O_TRUNC) => 0;
for (lfs_size_t j = 0; j < size; j++) {
char c = 'a' + (rand() % 26);
lfs_ssize_t res = lfs_file_write(&lfs, &file, &c, 1);
assert(res == 1 || res == LFS_ERR_NOSPC);
if (res == LFS_ERR_NOSPC) {
goto exhausted;
}
}
err = lfs_file_close(&lfs, &file);
assert(err == 0 || err == LFS_ERR_NOSPC);
if (err == LFS_ERR_NOSPC) {
goto exhausted;
}
}
for (uint32_t i = 0; i < FILES; i++) {
// check for errors
sprintf(path, "roadrunner/test%d", i);
srand(cycle * i);
size = 1 << 4; //((rand() % 10)+2);
lfs_file_open(&lfs, &file, path, LFS_O_RDONLY) => 0;
for (lfs_size_t j = 0; j < size; j++) {
char c = 'a' + (rand() % 26);
char r;
lfs_file_read(&lfs, &file, &r, 1) => 1;
assert(r == c);
}
lfs_file_close(&lfs, &file) => 0;
}
lfs_unmount(&lfs) => 0;
cycle += 1;
}
exhausted:
// should still be readable
lfs_mount(&lfs, &cfg) => 0;
for (uint32_t i = 0; i < FILES; i++) {
// check for errors
sprintf(path, "roadrunner/test%d", i);
lfs_stat(&lfs, path, &info) => 0;
}
lfs_unmount(&lfs) => 0;
LFS_WARN("completed %d cycles", cycle);
// check the wear on our block device
lfs_testbd_wear_t minwear = -1;
lfs_testbd_wear_t totalwear = 0;
lfs_testbd_wear_t maxwear = 0;
// skip 0 and 1 as superblock movement is intentionally avoided
for (lfs_block_t b = 2; b < LFS_BLOCK_COUNT; b++) {
lfs_testbd_wear_t wear = lfs_testbd_getwear(&cfg, b);
printf("%08x: wear %d\n", b, wear);
assert(wear >= 0);
if (wear < minwear) {
minwear = wear;
}
if (wear > maxwear) {
maxwear = wear;
}
totalwear += wear;
}
lfs_testbd_wear_t avgwear = totalwear / LFS_BLOCK_COUNT;
LFS_WARN("max wear: %d cycles", maxwear);
LFS_WARN("avg wear: %d cycles", totalwear / LFS_BLOCK_COUNT);
LFS_WARN("min wear: %d cycles", minwear);
// find standard deviation^2
lfs_testbd_wear_t dev2 = 0;
for (lfs_block_t b = 2; b < LFS_BLOCK_COUNT; b++) {
lfs_testbd_wear_t wear = lfs_testbd_getwear(&cfg, b);
assert(wear >= 0);
lfs_testbd_swear_t diff = wear - avgwear;
dev2 += diff*diff;
}
dev2 /= totalwear;
LFS_WARN("std dev^2: %d", dev2);
assert(dev2 < 8);
'''