summary refs log tree commit diff
path: root/ripple/fossil/src
diff options
context:
space:
mode:
authoredef <edef@unfathomable.blue>2022-05-01 15:48:42 +0000
committeredef <edef@unfathomable.blue>2022-05-01 15:48:42 +0000
commita53b951b5597ed1be46946073839032b0e2116df (patch)
treeac7a603629a78a5b997a992b5a7bdeebf98a8993 /ripple/fossil/src
parentd1f00e012bc4db37f54cdcad24356df01026f812 (diff)
downloadunf-legacy-a53b951b5597ed1be46946073839032b0e2116df.tar.zst
ripple/fossil/chunker: handle and test boundary condition correctness
This ensures that MIN_CHUNK_SIZE-sized chunks can actually be emitted,
and adds tests for both MIN_CHUNK_SIZE and MAX_CHUNK_SIZE chunks.

The behaviour for all cases now verifiably matches casync.

Change-Id: Ie0bfaf50ec02658069da83ebb30210e6e1963de6
Diffstat (limited to 'ripple/fossil/src')
-rw-r--r--ripple/fossil/src/chunker/mod.rs38
1 files changed, 36 insertions, 2 deletions
diff --git a/ripple/fossil/src/chunker/mod.rs b/ripple/fossil/src/chunker/mod.rs
index e52e49a..3bc160e 100644
--- a/ripple/fossil/src/chunker/mod.rs
+++ b/ripple/fossil/src/chunker/mod.rs
@@ -48,13 +48,13 @@ impl<'a> Iterator for Chunker<'a> {
 		let mut hasher = buz::Rolling::<48>::from_slice(&self.buffer[..MIN_CHUNK_SIZE]);
 		let chunk;
 		for (idx, byte) in bytes {
-			hasher.push(byte);
 			let buz::Hash(x) = hasher.sum();
 			if x % d == d.wrapping_sub(1) {
 				// split point
-				(chunk, self.buffer) = self.buffer.split_at(idx + 1);
+				(chunk, self.buffer) = self.buffer.split_at(idx);
 				return Some(chunk);
 			}
+			hasher.push(byte);
 		}
 
 		(chunk, self.buffer) = self.buffer.split_at(MAX_CHUNK_SIZE.min(self.buffer.len()));
@@ -161,4 +161,38 @@ mod test {
 			assert_eq!(actual, blake3::Hash::from(expected));
 		}
 	}
+
+	#[test]
+	fn all_zeroes() {
+		assert_eq!(
+			super::Chunker::from(&[0u8; super::MAX_CHUNK_SIZE + 1])
+				.next()
+				.unwrap()
+				.len(),
+			super::MAX_CHUNK_SIZE
+		);
+	}
+
+	#[test]
+	fn min_chunk() {
+		let data = generate(1024 * 32);
+
+		// a "tail" is a 48-byte sequence that terminates a chunk
+		// we extract one from our test vectors, since every chunk
+		// smaller than MAX_CHUNK_SIZE ends in a tail
+		let mut tail = [0; 48];
+		tail.copy_from_slice({
+			let chunk = super::Chunker::from(&data).next().unwrap();
+			chunk.rchunks_exact(48).next().unwrap()
+		});
+
+		let mut data = vec![0; super::MIN_CHUNK_SIZE - 48];
+		data.extend(tail);
+		data.push(0);
+
+		assert_eq!(
+			super::Chunker::from(&data).next().unwrap().len(),
+			super::MIN_CHUNK_SIZE
+		);
+	}
 }