Fixed broken wear-leveling when block_cycles = 2n-1

This was an interesting issue found during a GitHub discussion with
rmollway and thrasher8390.

Blocks in the metadata-pair are relocated every "block_cycles", or, more
mathy, when rev % block_cycles == 0 as long as rev += 1 every block write.

But there's a problem, rev isn't += 1 every block write. There are two
blocks in a metadata-pair, so looking at it from each blocks
perspective, rev += 2 every block write.

This leads to a sort of aliasing issue, where, if block_cycles is
divisible by 2, one block in the metadata-pair is always relocated, and
the other block is _never_ relocated. Causing a complete failure of
block-level wear-leveling.

Fortunately, because of a previous workaround to avoid block_cycles = 1
(since this will cause the relocation algorithm to never terminate), the
actual math is rev % (block_cycles+1) == 0. This means the bug only
shows its head in the much less likely case where block_cycles is a
multiple of 2 plus 1, or, in more mathy terms, block_cycles = 2n+1 for
some n.

To workaround this we can bitwise or our block_cycles with 1 to force it
to never be a multiple of 2n.

(Maybe we should do this during initialization? But then block_cycles
would need to be mutable.)

---

There's a few unrelated changes mixed into this commit that shouldn't be
there since I added this as part of a branch of bug fixes I'm putting
together rather hastily, so unfortunately this is not easily cherry-pickable.
This commit is contained in:
Christopher Haster
2020-02-09 08:52:20 -06:00
parent 6a550844f4
commit fe957de892
2 changed files with 153 additions and 33 deletions

View File

@@ -42,7 +42,7 @@ code = '''
if (err == LFS_ERR_NOSPC) {
goto exhausted;
}
}
}
for (uint32_t i = 0; i < FILES; i++) {
// check for errors
@@ -59,7 +59,7 @@ code = '''
}
lfs_file_close(&lfs, &file) => 0;
}
}
lfs_unmount(&lfs) => 0;
cycle += 1;
@@ -72,7 +72,7 @@ exhausted:
// check for errors
sprintf(path, "roadrunner/test%d", i);
lfs_stat(&lfs, path, &info) => 0;
}
}
lfs_unmount(&lfs) => 0;
LFS_WARN("completed %d cycles", cycle);
@@ -120,7 +120,7 @@ code = '''
if (err == LFS_ERR_NOSPC) {
goto exhausted;
}
}
}
for (uint32_t i = 0; i < FILES; i++) {
// check for errors
@@ -137,7 +137,7 @@ code = '''
}
lfs_file_close(&lfs, &file) => 0;
}
}
lfs_unmount(&lfs) => 0;
cycle += 1;
@@ -150,7 +150,7 @@ exhausted:
// check for errors
sprintf(path, "test%d", i);
lfs_stat(&lfs, path, &info) => 0;
}
}
lfs_unmount(&lfs) => 0;
LFS_WARN("completed %d cycles", cycle);
@@ -207,7 +207,7 @@ code = '''
if (err == LFS_ERR_NOSPC) {
goto exhausted;
}
}
}
for (uint32_t i = 0; i < FILES; i++) {
// check for errors
@@ -224,7 +224,7 @@ code = '''
}
lfs_file_close(&lfs, &file) => 0;
}
}
lfs_unmount(&lfs) => 0;
cycle += 1;
@@ -237,7 +237,7 @@ exhausted:
// check for errors
sprintf(path, "roadrunner/test%d", i);
lfs_stat(&lfs, path, &info) => 0;
}
}
lfs_unmount(&lfs) => 0;
run_cycles[run] = cycle;
@@ -292,7 +292,7 @@ code = '''
if (err == LFS_ERR_NOSPC) {
goto exhausted;
}
}
}
for (uint32_t i = 0; i < FILES; i++) {
// check for errors
@@ -309,7 +309,7 @@ code = '''
}
lfs_file_close(&lfs, &file) => 0;
}
}
lfs_unmount(&lfs) => 0;
cycle += 1;
@@ -322,7 +322,7 @@ exhausted:
// check for errors
sprintf(path, "test%d", i);
lfs_stat(&lfs, path, &info) => 0;
}
}
lfs_unmount(&lfs) => 0;
run_cycles[run] = cycle;
@@ -333,3 +333,113 @@ exhausted:
// check we increased the lifetime by 2x with ~10% error
LFS_ASSERT(run_cycles[1]*110/100 > 2*run_cycles[0]);
'''
[[case]] # test that we wear blocks roughly evenly
define.LFS_ERASE_CYCLES = 0xffffffff
define.LFS_BLOCK_COUNT = 256 # small bd so test runs faster
define.LFS_BLOCK_CYCLES = [5, 4, 3, 2, 1]
#define.LFS_BLOCK_CYCLES = [4, 2]
define.CYCLES = 100
define.FILES = 10
code = '''
lfs_format(&lfs, &cfg) => 0;
lfs_mount(&lfs, &cfg) => 0;
lfs_mkdir(&lfs, "roadrunner") => 0;
lfs_unmount(&lfs) => 0;
uint32_t cycle = 0;
while (cycle < CYCLES) {
lfs_mount(&lfs, &cfg) => 0;
for (uint32_t i = 0; i < FILES; i++) {
// chose name, roughly random seed, and random 2^n size
sprintf(path, "roadrunner/test%d", i);
srand(cycle * i);
size = 1 << 4; //((rand() % 10)+2);
lfs_file_open(&lfs, &file, path,
LFS_O_WRONLY | LFS_O_CREAT | LFS_O_TRUNC) => 0;
for (lfs_size_t j = 0; j < size; j++) {
char c = 'a' + (rand() % 26);
lfs_ssize_t res = lfs_file_write(&lfs, &file, &c, 1);
assert(res == 1 || res == LFS_ERR_NOSPC);
if (res == LFS_ERR_NOSPC) {
goto exhausted;
}
}
err = lfs_file_close(&lfs, &file);
assert(err == 0 || err == LFS_ERR_NOSPC);
if (err == LFS_ERR_NOSPC) {
goto exhausted;
}
}
for (uint32_t i = 0; i < FILES; i++) {
// check for errors
sprintf(path, "roadrunner/test%d", i);
srand(cycle * i);
size = 1 << 4; //((rand() % 10)+2);
lfs_file_open(&lfs, &file, path, LFS_O_RDONLY) => 0;
for (lfs_size_t j = 0; j < size; j++) {
char c = 'a' + (rand() % 26);
char r;
lfs_file_read(&lfs, &file, &r, 1) => 1;
assert(r == c);
}
lfs_file_close(&lfs, &file) => 0;
}
lfs_unmount(&lfs) => 0;
cycle += 1;
}
exhausted:
// should still be readable
lfs_mount(&lfs, &cfg) => 0;
for (uint32_t i = 0; i < FILES; i++) {
// check for errors
sprintf(path, "roadrunner/test%d", i);
lfs_stat(&lfs, path, &info) => 0;
}
lfs_unmount(&lfs) => 0;
LFS_WARN("completed %d cycles", cycle);
// check the wear on our block device
lfs_testbd_wear_t minwear = -1;
lfs_testbd_wear_t totalwear = 0;
lfs_testbd_wear_t maxwear = 0;
// skip 0 and 1 as superblock movement is intentionally avoided
for (lfs_block_t b = 2; b < LFS_BLOCK_COUNT; b++) {
lfs_testbd_wear_t wear = lfs_testbd_getwear(&cfg, b);
printf("%08x: wear %d\n", b, wear);
assert(wear >= 0);
if (wear < minwear) {
minwear = wear;
}
if (wear > maxwear) {
maxwear = wear;
}
totalwear += wear;
}
lfs_testbd_wear_t avgwear = totalwear / LFS_BLOCK_COUNT;
LFS_WARN("max wear: %d cycles", maxwear);
LFS_WARN("avg wear: %d cycles", totalwear / LFS_BLOCK_COUNT);
LFS_WARN("min wear: %d cycles", minwear);
// find standard deviation^2
lfs_testbd_wear_t dev2 = 0;
for (lfs_block_t b = 2; b < LFS_BLOCK_COUNT; b++) {
lfs_testbd_wear_t wear = lfs_testbd_getwear(&cfg, b);
assert(wear >= 0);
lfs_testbd_swear_t diff = wear - avgwear;
dev2 += diff*diff;
}
dev2 /= totalwear;
LFS_WARN("std dev^2: %d", dev2);
assert(dev2 < 8);
'''