diff options
author | edef <edef@unfathomable.blue> | 2022-05-02 21:09:10 +0000 |
---|---|---|
committer | edef <edef@unfathomable.blue> | 2022-05-02 21:09:10 +0000 |
commit | bab70406b61e731dd79b8a12ce35772d0f46226b (patch) | |
tree | e28995a539fe7a35056aa6c3c0e55eb03e2a2550 /ripple/fossil/src | |
parent | 832099e432fd3d702681b3c1d852ae99b3bb7d2e (diff) | |
download | unf-legacy-bab70406b61e731dd79b8a12ce35772d0f46226b.tar.zst |
ripple/fossil/chunker: iterate smarter
This drops the manual `len <= MIN_CHUNK_SIZE` check, and instead combines it into acquiring the to-be-scanned chunk. The pointer-based design doesn't need the iterator to be enumerated from the start of the buffer, so we don't need to use take/skip. Throughput improves about 5%. Change-Id: Ic430c7afde68bf1acfba1a2137a0b8ac064176ea
Diffstat (limited to 'ripple/fossil/src')
-rw-r--r-- | ripple/fossil/src/chunker/mod.rs | 17 |
1 files changed, 10 insertions, 7 deletions
diff --git a/ripple/fossil/src/chunker/mod.rs b/ripple/fossil/src/chunker/mod.rs index 59b3367..0e1bdc9 100644 --- a/ripple/fossil/src/chunker/mod.rs +++ b/ripple/fossil/src/chunker/mod.rs @@ -42,17 +42,21 @@ impl<'a> Iterator for Chunker<'a> { return None; } - if self.buffer.len() <= MIN_CHUNK_SIZE { - return Some(mem::take(&mut self.buffer)); - } + let max_len = MAX_CHUNK_SIZE.min(self.buffer.len()); + let bytes = match self.buffer.get(MIN_CHUNK_SIZE..max_len) { + None | Some(&[]) => { + return Some(mem::take(&mut self.buffer)); + } + Some(bytes) => bytes, + }; - let bytes = self.buffer.iter().take(MAX_CHUNK_SIZE).skip(MIN_CHUNK_SIZE); let mut hasher = unsafe { // SAFETY: `self.buffer.len > MIN_CHUNK_SIZE`, so this is in bounds buz::Rolling::<WINDOW_SIZE>::from_slice_unchecked( self.buffer.get_unchecked(..MIN_CHUNK_SIZE), ) }; + for byte in bytes { let buz::Hash(x) = hasher.sum(); if x % DISCRIMINATOR == DISCRIMINATOR.wrapping_sub(1) { @@ -70,9 +74,8 @@ impl<'a> Iterator for Chunker<'a> { } Some(unsafe { - // SAFETY: `idx` is clamped to `self.buffer.len()` - let idx = MAX_CHUNK_SIZE.min(self.buffer.len()); - self.cut(idx) + // SAFETY: `max_len` is clamped to `self.buffer.len()` + self.cut(max_len) }) } |