mirror of
				https://github.com/eledio-devices/thirdparty-littlefs.git
				synced 2025-10-31 16:14:16 +01:00 
			
		
		
		
	Adopted lfs_ctz_index implementation using popcount
This reduces the O(n^2logn) runtime to read a file to only O(nlog). The extra O(n) did not touch the disk, so it isn't a problem until the files become very large, but this solution comes with very little cost. Long story short, you can find the block index + offset pair for a CTZ linked-list with this series of formulas: n' = floor(N / (B - 2w/8)) N' = (B - 2w/8)n' + (w/8)popcount(n') off' = N - N' n, off = n'-1, off'+B if off' < 0 n', off'+(w/8)(ctz(n')+1) if off' >= 0 For the long story, you will need to see the updated DESIGN.md
This commit is contained in:
		
							
								
								
									
										109
									
								
								DESIGN.md
									
									
									
									
									
								
							
							
						
						
									
										109
									
								
								DESIGN.md
									
									
									
									
									
								
							| @@ -292,7 +292,7 @@ We can find the runtime complexity by looking at the path to any block from | |||||||
| the block containing the most pointers. Every step along the path divides | the block containing the most pointers. Every step along the path divides | ||||||
| the search space for the block in half. This gives us a runtime of O(logn). | the search space for the block in half. This gives us a runtime of O(logn). | ||||||
| To get to the block with the most pointers, we can perform the same steps | To get to the block with the most pointers, we can perform the same steps | ||||||
| backwards, which keeps the asymptotic runtime at O(log n). The interesting | backwards, which puts the runtime at O(2logn) = O(logn). The interesting | ||||||
| part about this data structure is that this optimal path occurs naturally | part about this data structure is that this optimal path occurs naturally | ||||||
| if we greedily choose the pointer that covers the most distance without passing | if we greedily choose the pointer that covers the most distance without passing | ||||||
| our target block. | our target block. | ||||||
| @@ -304,17 +304,18 @@ in a block, this is pretty reasonable. | |||||||
|  |  | ||||||
| Unfortunately, the CTZ skip-list comes with a few questions that aren't | Unfortunately, the CTZ skip-list comes with a few questions that aren't | ||||||
| straightforward to answer. What is the overhead? How do we handle more | straightforward to answer. What is the overhead? How do we handle more | ||||||
| pointers than we can store in a block? | pointers than we can store in a block? How do we store the skip-list in | ||||||
|  | a directory entry? | ||||||
|  |  | ||||||
| One way to find the overhead per block is to look at the data structure as | One way to find the overhead per block is to look at the data structure as | ||||||
| multiple layers of linked-lists. Each linked-list skips twice as many blocks | multiple layers of linked-lists. Each linked-list skips twice as many blocks | ||||||
| as the previous linked-list. Or another way of looking at it is that each  | as the previous linked-list. Another way of looking at it is that each  | ||||||
| linked-list uses half as much storage per block as the previous linked-list. | linked-list uses half as much storage per block as the previous linked-list. | ||||||
| As we approach infinity, the number of pointers per block forms a geometric | As we approach infinity, the number of pointers per block forms a geometric | ||||||
| series. Solving this geometric series gives us an average of only 2 pointers | series. Solving this geometric series gives us an average of only 2 pointers | ||||||
| per block. | per block. | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| Finding the maximum number of pointers in a block is a bit more complicated, | Finding the maximum number of pointers in a block is a bit more complicated, | ||||||
| but since our file size is limited by the integer width we use to store the | but since our file size is limited by the integer width we use to store the | ||||||
| @@ -322,7 +323,7 @@ size, we can solve for it. Setting the overhead of the maximum pointers equal | |||||||
| to the block size we get the following equation. Note that a smaller block size | to the block size we get the following equation. Note that a smaller block size | ||||||
| results in more pointers, and a larger word width results in larger pointers. | results in more pointers, and a larger word width results in larger pointers. | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| where:   | where:   | ||||||
| B = block size in bytes   | B = block size in bytes   | ||||||
| @@ -335,8 +336,102 @@ widths: | |||||||
|  |  | ||||||
| Since littlefs uses a 32 bit word size, we are limited to a minimum block | Since littlefs uses a 32 bit word size, we are limited to a minimum block | ||||||
| size of 104 bytes. This is a perfectly reasonable minimum block size, with most | size of 104 bytes. This is a perfectly reasonable minimum block size, with most | ||||||
| block sizes starting around 512 bytes. So we can avoid the additional logic | block sizes starting around 512 bytes. So we can avoid additional logic to | ||||||
| needed to avoid overflowing our block's capacity in the CTZ skip-list. | avoid overflowing our block's capacity in the CTZ skip-list. | ||||||
|  |  | ||||||
|  | So, how do we store the skip-list in a directory entry? A naive approach would | ||||||
|  | be to store a pointer to the head of the skip-list, the length of the file | ||||||
|  | in bytes, the index of the head block in the skip-list, and the offset in the | ||||||
|  | head block in bytes. However this is a lot of information, and we can observe | ||||||
|  | that a file size maps to only one block index + offset pair. So it should be | ||||||
|  | sufficient to store only the pointer and file size. | ||||||
|  |  | ||||||
|  | But there is one problem, calculating the block index + offset pair from a | ||||||
|  | file size doesn't have an obvious implementation. | ||||||
|  |  | ||||||
|  | We can start by just writing down an equation. The first idea that comes to | ||||||
|  | mind is to just use a for loop to sum together blocks until we reach our | ||||||
|  | file size. We can write equation equation as a summation: | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | where:   | ||||||
|  | B = block size in bytes   | ||||||
|  | w = word width in bits   | ||||||
|  | n = block index in skip-list   | ||||||
|  | N = file size in bytes   | ||||||
|  |  | ||||||
|  | And this works quite well, but is not trivial to calculate. This equation | ||||||
|  | requires O(n) to compute, which brings the entire runtime of reading a file | ||||||
|  | to O(n^2logn). Fortunately, the additional O(n) does not need to touch disk, | ||||||
|  | so it is not completely unreasonable. But if we could solve this equation into | ||||||
|  | a form that is easily computable, we can avoid a big slowdown. | ||||||
|  |  | ||||||
|  | Unfortunately, the summation of the CTZ instruction presents a big challenge. | ||||||
|  | How would you even begin to reason about integrating a bitwise instruction? | ||||||
|  | Fortunately, there is a powerful tool I've found useful in these situations: | ||||||
|  | The [On-Line Encyclopedia of Integer Sequences (OEIS)](https://oeis.org/). | ||||||
|  | If we work out the first couple of values in our summation, we find that CTZ | ||||||
|  | maps to [A001511](https://oeis.org/A001511), and its partial summation maps | ||||||
|  | to [A005187](https://oeis.org/A005187), and surprisingly, both of these | ||||||
|  | sequences have relatively trivial equations! This leads us to the completely | ||||||
|  | unintuitive property: | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | where:   | ||||||
|  | ctz(i) = the number of trailing bits that are 0 in i   | ||||||
|  | popcount(i) = the number of bits that are 1 in i   | ||||||
|  |  | ||||||
|  | I find it bewildering that these two seemingly unrelated bitwise instructions | ||||||
|  | are related by this property. But if we start to disect this equation we can | ||||||
|  | see that it does hold. As n approaches infinity, we do end up with an average | ||||||
|  | overhead of 2 pointers as we find earlier. And popcount seems to handle the | ||||||
|  | error from this average as it accumulates in the CTZ skip-list. | ||||||
|  |  | ||||||
|  | Now we can substitute into the original equation to get a trivial equation | ||||||
|  | for a file size: | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | Unfortunately, we're not quite done. The popcount function is non-injective, | ||||||
|  | so we can only find the file size from the block index, not the other way | ||||||
|  | around. However, we can guess and correct. Consider an n' block index that | ||||||
|  | is greater than n, we can find one pretty easily: | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | where:   | ||||||
|  | n' >= n   | ||||||
|  |  | ||||||
|  | We can plug n' back into our popcount equation to find an N' file size that | ||||||
|  | is greater than N. However, we need to rearrange our terms a bit to avoid | ||||||
|  | integer overflow: | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | where:   | ||||||
|  | N' >= N   | ||||||
|  |  | ||||||
|  | Now that we have N', we can find our block offset: | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | where:   | ||||||
|  | off' >= off, our byte offset in the block   | ||||||
|  |  | ||||||
|  | Now we're getting somewhere. N' is greater than or equal to N, and as long as | ||||||
|  | the number of pointers per block is bounded by the block size, it can only be | ||||||
|  | different by at most one block. So we have two cases that can be determined by | ||||||
|  | the sign of off'. If off' is negative, we correct n' and add a block to off'. | ||||||
|  | Note that we also need to incorporate the overhead of the last block to get | ||||||
|  | the right offset. | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | It's a lot of math, but computers are very good at math. With these equations | ||||||
|  | we can solve for the block index + offset while only needed to store the file | ||||||
|  | size in O(1). | ||||||
|  |  | ||||||
| Here is what it might look like to update a file stored with a CTZ skip-list: | Here is what it might look like to update a file stored with a CTZ skip-list: | ||||||
| ``` | ``` | ||||||
|   | |||||||
							
								
								
									
										19
									
								
								lfs.c
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								lfs.c
									
									
									
									
									
								
							| @@ -1004,16 +1004,23 @@ int lfs_dir_rewind(lfs_t *lfs, lfs_dir_t *dir) { | |||||||
|  |  | ||||||
| /// File index list operations /// | /// File index list operations /// | ||||||
| static int lfs_ctz_index(lfs_t *lfs, lfs_off_t *off) { | static int lfs_ctz_index(lfs_t *lfs, lfs_off_t *off) { | ||||||
|     lfs_off_t i = 0; |     lfs_off_t size = *off; | ||||||
|  |     lfs_off_t i = size / (lfs->cfg->block_size-2*4); | ||||||
|     while (*off >= lfs->cfg->block_size) { |     if (i == 0) { | ||||||
|         i += 1; |         return 0; | ||||||
|         *off -= lfs->cfg->block_size; |  | ||||||
|         *off += 4*(lfs_ctz(i) + 1); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     lfs_off_t nsize = (lfs->cfg->block_size-2*4)*i + 4*lfs_popc(i-1) + 2*4; | ||||||
|  |     lfs_soff_t noff = size - nsize; | ||||||
|  |  | ||||||
|  |     if (noff < 0) { | ||||||
|  |         *off = noff + lfs->cfg->block_size; | ||||||
|  |         return i-1; | ||||||
|  |     } else { | ||||||
|  |         *off = noff + 4*(lfs_ctz(i) + 1); | ||||||
|         return i; |         return i; | ||||||
|     } |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| static int lfs_ctz_find(lfs_t *lfs, | static int lfs_ctz_find(lfs_t *lfs, | ||||||
|         lfs_cache_t *rcache, const lfs_cache_t *pcache, |         lfs_cache_t *rcache, const lfs_cache_t *pcache, | ||||||
|   | |||||||
| @@ -41,6 +41,10 @@ static inline uint32_t lfs_npw2(uint32_t a) { | |||||||
|     return 32 - __builtin_clz(a-1); |     return 32 - __builtin_clz(a-1); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static inline uint32_t lfs_popc(uint32_t a) { | ||||||
|  |     return __builtin_popcount(a); | ||||||
|  | } | ||||||
|  |  | ||||||
| static inline int lfs_scmp(uint32_t a, uint32_t b) { | static inline int lfs_scmp(uint32_t a, uint32_t b) { | ||||||
|     return (int)(unsigned)(a - b); |     return (int)(unsigned)(a - b); | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user