From bab70406b61e731dd79b8a12ce35772d0f46226b Mon Sep 17 00:00:00 2001 From: edef Date: Mon, 2 May 2022 21:09:10 +0000 Subject: ripple/fossil/chunker: iterate smarter This drops the manual `len <= MIN_CHUNK_SIZE` check, and instead combines it into acquiring the to-be-scanned chunk. The pointer-based design doesn't need the iterator to be enumerated from the start of the buffer, so we don't need to use take/skip. Throughput improves about 5%. Change-Id: Ic430c7afde68bf1acfba1a2137a0b8ac064176ea --- ripple/fossil/src/chunker/mod.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'ripple/fossil/src/chunker') diff --git a/ripple/fossil/src/chunker/mod.rs b/ripple/fossil/src/chunker/mod.rs index 59b3367..0e1bdc9 100644 --- a/ripple/fossil/src/chunker/mod.rs +++ b/ripple/fossil/src/chunker/mod.rs @@ -42,17 +42,21 @@ impl<'a> Iterator for Chunker<'a> { return None; } - if self.buffer.len() <= MIN_CHUNK_SIZE { - return Some(mem::take(&mut self.buffer)); - } + let max_len = MAX_CHUNK_SIZE.min(self.buffer.len()); + let bytes = match self.buffer.get(MIN_CHUNK_SIZE..max_len) { + None | Some(&[]) => { + return Some(mem::take(&mut self.buffer)); + } + Some(bytes) => bytes, + }; - let bytes = self.buffer.iter().take(MAX_CHUNK_SIZE).skip(MIN_CHUNK_SIZE); let mut hasher = unsafe { // SAFETY: `self.buffer.len > MIN_CHUNK_SIZE`, so this is in bounds buz::Rolling::::from_slice_unchecked( self.buffer.get_unchecked(..MIN_CHUNK_SIZE), ) }; + for byte in bytes { let buz::Hash(x) = hasher.sum(); if x % DISCRIMINATOR == DISCRIMINATOR.wrapping_sub(1) { @@ -70,9 +74,8 @@ impl<'a> Iterator for Chunker<'a> { } Some(unsafe { - // SAFETY: `idx` is clamped to `self.buffer.len()` - let idx = MAX_CHUNK_SIZE.min(self.buffer.len()); - self.cut(idx) + // SAFETY: `max_len` is clamped to `self.buffer.len()` + self.cut(max_len) }) } -- cgit 1.4.1