// SPDX-FileCopyrightText: edef // SPDX-License-Identifier: OSL-3.0 use std::mem; mod buz; const WINDOW_SIZE: usize = 48; pub const MIN_CHUNK_SIZE: usize = AVG_CHUNK_SIZE / 4; pub const AVG_CHUNK_SIZE: usize = 64 * 1024; pub const MAX_CHUNK_SIZE: usize = AVG_CHUNK_SIZE * 4; const DISCRIMINATOR: u32 = 0xc17f; #[cfg(test)] fn discriminator_from_average(avg: u64) -> u32 { let avg = avg as f64; (avg / (-1.42888852e-7 * avg + 1.33237515)) as u32 } pub struct Chunker<'a> { buffer: &'a [u8], } impl<'a> Chunker<'a> { pub fn from(buffer: &'a [u8]) -> Chunker<'a> { Chunker { buffer } } } impl<'a> Iterator for Chunker<'a> { type Item = &'a [u8]; fn next(&mut self) -> Option<&'a [u8]> { if self.buffer.is_empty() { return None; } if self.buffer.len() <= MIN_CHUNK_SIZE { return Some(mem::take(&mut self.buffer)); } let bytes = self .buffer .iter() .cloned() .enumerate() .take(MAX_CHUNK_SIZE) .skip(MIN_CHUNK_SIZE); let mut hasher = buz::Rolling::::from_slice(&self.buffer[..MIN_CHUNK_SIZE]); let chunk; for (idx, byte) in bytes { let buz::Hash(x) = hasher.sum(); if x % DISCRIMINATOR == DISCRIMINATOR.wrapping_sub(1) { // split point (chunk, self.buffer) = self.buffer.split_at(idx); return Some(chunk); } hasher.push(byte); } (chunk, self.buffer) = self.buffer.split_at(MAX_CHUNK_SIZE.min(self.buffer.len())); Some(chunk) } fn size_hint(&self) -> (usize, Option) { let min = (self.buffer.len() + MAX_CHUNK_SIZE - 1) / MAX_CHUNK_SIZE; let max = (self.buffer.len() + MIN_CHUNK_SIZE - 1) / MIN_CHUNK_SIZE; (min, Some(max)) } } #[cfg(test)] mod test { use { super::{ discriminator_from_average, Chunker, AVG_CHUNK_SIZE, DISCRIMINATOR, MAX_CHUNK_SIZE, MIN_CHUNK_SIZE, WINDOW_SIZE, }, std::io::Read, }; fn generate(length: usize) -> Vec { let mut h = blake3::Hasher::new(); h.update(b"test vector"); let mut buf = vec![0; length]; h.finalize_xof().read_exact(&mut buf).unwrap(); buf } const GOLDEN: &[[u8; 32]] = &[ *b"\x66\x77\xd1\xf7\x78\xbf\x7b\x42\x91\x15\x16\xda\xde\xae\x69\x17\x59\xe9\x73\xe8\x5b\xa2\xa0\x92\x6f\x98\x76\x8c\xbb\xb4\x74\xc7", *b"\xad\xd6\xc0\x6e\x0e\xef\xb5\x3b\x1f\x8b\xeb\xac\x21\xa8\xf7\x0c\xc9\xeb\xc8\x20\x22\xf0\xfe\x0a\x72\x15\x15\x21\x32\x7d\xe5\x0a", *b"\xbc\xd0\x68\x57\x0a\x66\x70\xf3\x3e\x81\xc9\xa8\x7d\xac\xd9\xde\x80\xb9\x8a\x19\x8d\x54\xcd\xd7\x5e\x76\xca\x0f\xb0\x0d\x53\x11", *b"\x91\x16\x6f\x72\x92\x3d\x00\xbb\x46\x82\x7a\xc5\x35\xda\x0c\x6e\x20\xbc\x1a\xb0\xb8\x40\x58\x0b\xf4\xc4\xe3\xbe\xb7\x81\xeb\xd8", *b"\x51\x59\xea\xdf\xd1\x41\x38\x11\xa9\x91\xb3\xf4\x91\x8c\x7b\x61\x38\x4a\x04\x59\x1f\xe9\xc1\xa6\x01\x19\x32\x5b\xfa\x3b\x24\x92", *b"\x7c\x64\x0c\xa5\xe8\x3a\x25\xeb\xaf\xe3\x1e\x5b\x58\xd4\x8b\xb8\x0a\xa5\x50\x5d\x18\x8a\x79\x38\xe3\x71\x61\x6d\x24\x4b\x0d\x07", *b"\x6c\x4b\xc6\x17\xf2\xaf\x9b\xa6\x81\xd4\x1b\xf1\x77\xce\xcf\x0b\xb7\xbf\x3a\x54\xc7\x26\x5f\x8c\x43\x5a\x40\x61\xf5\x5c\x47\x75", *b"\x98\x74\xd1\x45\x30\xe9\x47\x0c\xcf\x76\x52\x65\x85\x96\xf6\x90\x6f\x1d\x84\x24\xe1\x4f\x08\x55\x03\xf8\xe8\x8f\x10\xb4\xec\xc8", *b"\xd2\x87\x27\x3d\xa5\x7d\x31\x75\x56\x82\xb7\x13\x4c\xea\x68\xbf\xc4\x1c\x0b\xb9\xce\x64\xb1\x90\xfb\xde\x2a\xd8\xce\xc0\x1b\x2c", *b"\x68\xbd\xfb\x6c\x99\xf5\x3e\x82\xf6\x9e\x6a\xa6\x01\x56\x00\x3f\xab\x8e\x9c\xf7\x98\x93\x1a\x70\xef\x2b\x19\x5b\x1c\xf0\x1e\x38", *b"\xae\x9f\xa2\x52\x89\x7b\xda\xcf\x65\x09\x93\xd1\xe0\x96\x73\xb6\xfb\xa6\x0f\x58\x02\xec\x79\x22\xf8\xa7\xea\xe8\x16\x60\x6d\x1b", *b"\x8b\x9e\x83\xc2\x45\x75\x9c\x3a\xf4\x0f\x4b\xb4\xbd\x82\xa1\x62\x66\xde\xd3\x2f\xda\xf6\xfc\x12\xc9\x57\x40\xe2\x18\x90\x89\x67", *b"\x6b\x0d\x3f\x9b\xec\x24\xb2\xf7\x90\x8c\x75\x28\x57\xc9\x6f\x9e\xa7\x78\x94\x31\xaf\xa4\xa4\x61\x9c\x6e\xa7\xf9\x64\x60\x48\xcd", *b"\xbd\x04\x1a\x0c\x3e\xe2\x3e\x3a\xe6\x6c\x5b\xcd\x8d\x8a\x1e\xfe\x4d\xd5\xac\x14\xd5\xe6\x59\x40\x0e\xa0\xcf\x75\x32\xc6\xb6\x7a", *b"\x66\x73\x94\x96\x23\x81\x69\x26\x34\x94\x4d\x20\xf3\x6d\x67\x3d\xb6\xb9\x9b\xc7\xa7\x7c\x22\xc3\x56\xd1\x58\xe5\x4e\x55\x0a\x33", *b"\x26\xdd\xfc\x31\xac\xfc\xd2\x57\x9d\x52\x65\xdc\xf3\xe0\x8a\xde\x13\x04\x5f\xee\xc4\xf4\x35\xce\xb6\xcf\xd1\x7f\xc0\xc5\x4b\x7f", *b"\xe2\xd5\x6a\xb7\xdb\x71\x1d\x4d\xb2\xb4\xdb\xee\x94\x4c\xac\x92\x6e\xdc\xf3\xfd\x92\x44\xf1\x79\x66\x2a\x1e\x70\xd6\x2e\xb4\x6b", *b"\x86\x5d\x5f\x60\x75\x3d\xff\x7b\xac\x50\xa3\x11\x82\x7a\xb8\xfe\xde\xbc\xb1\x0a\xfd\xea\xa8\x08\xaa\x28\xfc\x95\x72\x04\x72\x13", *b"\xa6\x2d\xcc\x3a\x2e\x8e\xe9\xf2\x87\xc7\xcc\xa8\xc8\xa3\x18\x33\xc9\x43\x31\x4a\xab\xfe\xaf\xc1\x1d\xc6\x22\x3d\xf3\x54\x45\xe2", *b"\x75\x1d\x1f\x3d\x94\x8e\x7d\x27\x22\x7f\x1a\x0e\xcc\x61\xef\xbd\x41\x16\x53\xb9\x5b\x78\x05\x06\x80\x60\xa3\x17\x4f\x01\x10\x51", *b"\x4e\x0f\x4d\x71\x65\x10\xa5\x81\xff\xfe\x62\x6b\x15\x94\x17\x4c\x6d\x12\x85\xbe\x74\xcb\x56\x04\xc7\x38\x8f\xb7\x6e\xaa\xfd\x12", *b"\xe4\xb8\x64\x03\x92\xe5\x71\x89\x31\xfc\xc7\xe9\xee\xcd\x92\xfb\x92\xb0\x1d\x32\x34\xa1\x91\xd4\x7f\x5b\x8b\x37\xb8\x60\x2d\x6a", *b"\xc6\x3c\x17\xc0\x94\x0d\x16\xec\xc6\x67\x1f\xd1\x7a\xec\x12\x10\xf3\xf6\x1e\x59\x4b\x29\xab\x1c\xd4\xf6\xd1\x2e\x8a\xd7\x42\xc1", *b"\x8d\x59\x07\x06\x8c\x84\x68\x2a\x02\xb0\xe3\x2a\x88\x64\x28\x1a\xbf\x58\x74\xb5\xf9\x75\x5f\xd8\xc8\x4d\xa6\x9b\xd3\x40\x19\x05", *b"\x45\xa7\xc6\x21\xb3\x14\x14\x95\x74\x2e\xa4\xdc\x1f\x11\xe0\x77\xd5\xf3\x68\xe2\xc1\x9a\xb9\x83\xed\xd4\xf6\x77\x83\x09\x8f\x54", *b"\x94\x79\x72\xb1\x57\x2a\xb5\x2d\x81\x87\x9b\x32\xa8\xbf\xa8\xb6\x69\xff\x82\x69\x37\x69\x56\x7f\x26\xa4\xf2\x6c\x9a\xfd\x7e\x1a", *b"\x6f\x7f\x6d\xf1\x1f\xbb\x57\x2b\x2f\x29\x9f\x3d\x4e\x00\xd3\x75\x9b\xf2\x9f\x5d\xd4\xe5\x07\x9e\x46\xe0\x73\x3e\x68\x52\xa4\x99", *b"\xbb\x38\x7e\xc3\x7b\x3f\xe4\x9d\x05\xcc\x38\xa0\xe2\x3e\xb0\x95\x23\x43\x92\x2b\x83\x77\x10\xe4\x33\x7d\xe9\x75\xac\xdd\xe4\xcf", *b"\x32\x36\x32\x5b\x9f\xa9\x47\xd3\xfb\xca\xc0\x4e\x3d\x4a\xa6\xb8\x42\x16\x91\x43\xf2\x20\x04\x8d\x12\xb9\xb2\xa4\xc8\x97\xff\x99", *b"\x91\x11\xf3\xae\x36\x81\x03\xa5\x11\x74\xcd\x39\x06\x38\x1b\xa6\x87\x06\x28\x8a\x8a\x12\xbd\x35\xf6\x3c\xfe\xa9\x4a\x5b\x4e\x99", *b"\x68\x58\x3d\x1b\x52\xcb\x10\x28\x13\x1c\x99\x3b\xba\x09\x0b\x61\xc1\x51\x16\x03\xd9\x53\xba\x92\x6b\x45\x52\x8b\x90\x3c\xe1\x64", *b"\x5c\x26\xc4\xc9\x46\x08\x4d\x5d\xd2\xd7\x1e\xc7\xab\x95\xe8\xa6\xc1\x03\xff\x1f\x48\x1f\x2a\x8f\xe6\xa7\x05\x3c\xbb\xde\x9e\xbc", *b"\xf2\x25\xf0\xee\xd0\xa8\x2c\x50\x1b\x02\xaf\xd4\x56\x7b\x45\xea\x10\xf6\x8f\xb7\x2c\xe2\xe7\x88\xc6\x52\x91\x5d\xfe\x18\x4a\x0e", *b"\xb0\x38\xfe\x27\x98\xc4\x4d\xad\xfa\x07\x09\xa4\x40\xc8\x5a\x85\x69\xb5\xbd\xf3\xac\xb2\x82\x2e\x55\x22\xb4\x19\xf6\x30\xde\x82", *b"\x5f\x98\x00\xfd\x1d\x88\xf0\x00\x39\x20\x70\x5c\x2b\x81\xb3\x2d\x31\x7c\x1c\xa8\xcc\x21\x09\xb5\xfc\xd0\xae\x59\xdb\x45\xbf\x03", *b"\xf5\x92\xf4\x5f\xb9\x29\x1c\xc1\xee\xf1\x12\xf8\x0d\xc4\x92\x3a\xc8\x45\x5b\x1e\x2b\xae\xfc\xb4\x35\x21\x15\xcf\xc8\xc8\x3b\x19", *b"\xc2\x2e\x11\x46\x01\x5f\x0c\xa2\x6a\xe1\x13\x6d\x30\xd4\x86\xdf\xdd\xe0\x30\x8f\x3e\xaa\x1c\xd3\xf3\xda\x4c\x72\xdb\xe3\xe3\x23", *b"\xf9\x98\x5c\xcd\x6e\x19\xbf\xfa\xeb\x28\xf5\xf1\xab\xea\x25\x8d\xd4\xa7\xb7\x11\x95\x4f\xed\x9b\x0d\xba\x08\x83\xc4\x97\xad\x65", *b"\x76\xb1\x15\x9a\x9a\x76\xac\x13\x7e\xfa\x0f\x3c\xaa\x70\xb5\x9a\x60\x4d\xb9\x5b\x48\xd0\x38\x85\x1d\x4a\xcb\xa9\xe0\x4d\x41\x10", *b"\xbe\xf5\xbb\x3b\x80\x96\x52\x07\x8a\x39\xdc\xf4\xc4\x36\x34\xd5\x2f\xb3\xf4\x81\xde\x92\x7f\x7b\xcc\x0c\xc6\x82\x10\xd0\xdb\xa4", *b"\x91\x8b\x9a\x12\x12\x04\x84\x7a\x08\x03\x88\x0d\x59\x0a\xe1\x79\xd8\x21\x28\x96\xa4\x93\x3e\x66\xc3\xf0\x0d\x7f\x63\xc7\xc0\x0b", *b"\xa8\x0b\x46\xe0\x44\x86\xe1\xa4\xe8\x39\x61\x73\x69\x59\xed\xf4\x63\xfd\x1a\x55\x9b\x12\xf3\xa1\xab\x4c\x89\x3b\xb9\x31\x8b\xbf", *b"\x9f\xc1\x32\x3f\xcf\x6b\x96\xd8\x26\xbe\x3e\x73\x30\xed\x53\x10\xe7\x9d\x35\x72\x1e\x67\xe6\x77\x4f\x6b\xa8\xd3\xdf\xd7\x13\x66", *b"\x6a\x6b\x9c\xfa\xcd\x49\xb5\xd4\x17\x9f\xaa\xe0\x25\x9a\xf4\xa4\x46\x39\x56\xff\x6f\xdf\x5f\x6f\xa5\xa2\x52\x98\xe4\x1b\x64\x05", *b"\x15\xd2\x00\x5b\x94\xa4\xa0\xa7\x93\xce\x66\x89\xc3\xa6\xa6\x5e\x5c\xa1\x51\x4e\x3b\xf2\xbd\x9d\xc5\x27\x3c\x34\xaa\xc6\x7b\xfc", *b"\x85\xeb\xa0\x53\xce\xdd\xfc\x1e\x2d\x16\x09\xb0\x3d\xd1\x1f\x3c\xf0\x71\x2d\xa2\xbd\x1e\xb6\x1a\x27\x0c\xe1\x34\x42\x49\x8c\x41", *b"\xd1\xaf\xae\x46\x0e\xb6\xc3\x7e\xb7\x83\x45\xab\xc5\x2e\xc4\x3b\x16\x52\x84\x0f\xbf\x4f\x79\x0c\x2b\xa7\x98\xc5\x4d\x0b\x17\xff", *b"\x0e\xb6\x4e\xf4\x49\x54\xa9\x5d\x51\xf1\xf4\x5e\xf7\x56\xe1\x84\x7b\xaa\x25\x3b\xa4\xa0\xe4\xf6\x5f\xc5\x55\x12\x57\x1f\x30\x27", *b"\xc6\x43\x9d\x8c\x49\x8c\x45\xb9\x05\x14\xe3\x07\xdf\x1f\xa9\x9f\x41\xf2\xd0\xe1\xde\x90\xa8\x52\xa0\x55\x40\xa1\x94\xf7\xad\xd3", *b"\x8f\x8a\xc3\x9c\x0d\xbc\xf7\xd6\x65\xda\x26\x0a\x82\x6d\x88\xe0\x88\xd8\xba\x73\x81\xce\x5d\x1e\xa6\xdd\x88\x94\x56\x89\x17\xa6", *b"\x97\xe1\xbf\xc8\x8f\x16\x99\xce\x00\x44\x9c\x5a\x5f\x69\xb0\xfd\xf6\xc9\x94\x10\x73\x53\x10\xf2\xb6\x22\x2a\x6f\x25\x67\xbe\xc2", *b"\x80\x02\x68\x53\x7b\x24\xe0\x19\x7b\x10\x9c\x47\x77\xab\x0f\x4c\x09\x28\x5d\xa5\x54\xd1\x25\x65\x97\x4d\x45\xd2\xa0\x71\xec\x2d", *b"\x55\xbf\x83\x5b\xa6\xba\xb1\x43\x1b\x18\x23\xd9\x64\x93\x66\x0a\x92\x06\xfe\xa1\xec\xb5\xce\x95\x9f\x9f\x7d\xc4\x92\x49\x84\xfd", *b"\xdb\x6a\xcd\x67\x7a\x11\x65\x38\x56\x65\x89\xfd\x1b\x5f\x86\x19\x1f\xb9\x38\x94\x98\x5d\x6b\x30\x82\xce\xae\x3e\x53\x4c\xbb\x95", *b"\xd1\x83\xc3\xd4\xb0\x65\xf0\x77\xad\x50\x65\x24\xec\x07\x48\x1d\xe4\x9d\x2c\x6e\x94\xc9\x1d\x58\x43\x7a\x93\xaf\xff\x6e\xd2\xbd", *b"\x07\x17\x08\x3d\xcb\xb2\x4e\x38\x96\x1e\x68\x84\x39\xba\x3a\x40\xc2\x6a\x72\xcf\xf8\x2d\x56\x18\x44\x42\x9d\xa8\x08\x67\xfc\x10", *b"\xed\x8d\xdb\xbf\x6e\x1a\xd0\x0a\x34\x61\xac\xab\x32\x3f\x2a\x70\x51\xf7\x39\xb5\x63\xbb\xac\x83\xba\x56\x73\xc8\x4a\x60\x09\x24", *b"\x46\x63\x1d\x73\x51\xf6\xe2\x57\xa5\x6a\xbb\xb5\x88\x76\x3c\xeb\x89\xbe\xc5\x1b\x95\x9c\xc5\x30\x02\x21\xad\x35\x2f\xe5\xae\x51", *b"\x69\xcb\x42\x2a\xd8\x9b\xcc\xcc\x51\x3b\x58\xe9\x21\x61\xee\x81\xb1\x0d\x0a\xc4\xb1\xc7\xa0\x4f\x19\x76\x57\x5e\xd8\x53\x0a\xee", *b"\x9b\x39\x2e\x2e\xd5\x7f\x0f\x82\x04\xbb\x7c\xf6\x88\x39\xd9\xd1\x91\xc8\x06\x71\x36\x1d\x96\xc2\x9d\xdf\xc2\x6c\x18\xe1\x1b\x79", *b"\xe7\xe0\x6b\xfa\x7c\xcd\x85\x9e\x10\xb8\xac\x33\xae\x61\x5a\x29\x98\x2a\x65\xc1\x31\x33\xe2\x7e\xd2\x97\x5e\x9a\x67\xe2\x16\x52", *b"\x5b\x01\x92\x69\x45\xf7\xd6\x35\xf7\xca\x28\xa6\xd7\x1c\xd8\x8d\xac\x15\x6e\x00\xeb\xaa\xe4\x9c\x2f\xe5\x03\x95\x79\xf1\x55\xf1", *b"\x9d\x8f\x23\x51\xb6\xae\x39\xe9\xae\x13\x5c\xcb\x86\xb5\xdd\xc7\x77\xa8\x92\x23\x59\xb8\x1d\xea\x58\x64\x34\x0c\x13\xbe\x95\xba", *b"\xf2\x2f\x2d\x26\xa5\x78\xbe\x8d\xd2\xb1\xf9\xf4\x19\x01\x29\xd6\x87\xc6\xcf\x57\x46\xb1\x3c\x63\x15\x1c\x65\xee\x83\x7e\x16\x56", *b"\xd5\x81\x12\x6b\x6b\xdc\xc4\xc6\x84\x25\x04\xdd\xc9\x8c\xb1\x50\x56\x19\x6e\xbf\x69\x03\x12\x38\x93\x20\x98\xb7\xf2\x5e\x4c\xd6", *b"\x12\x4b\xbc\x2e\x30\x71\x18\xa5\x82\xd5\xae\x52\xfa\xfd\xbf\xcb\x75\x2b\x94\x8e\x9e\xc3\xaf\x56\xa1\x41\xdc\x57\xaf\x40\x50\xdb", *b"\xaa\xaa\x52\xad\xa4\xf5\xde\xd6\x92\x9e\xbf\xb9\x12\xfa\x70\x12\xa8\x19\x6e\x0c\xa4\xdf\x7c\x99\x88\x94\x2c\x3a\x98\x3d\xa6\x4e", ]; #[test] fn golden() { let data = generate(1024 * 64 * 64); let chunks = Chunker::from(&data).map(blake3::hash); for (actual, &expected) in chunks.zip(GOLDEN.iter()) { assert_eq!(actual, blake3::Hash::from(expected)); } } #[test] fn max_chunk() { // all-zero byte sequences don't contain any split points, // so the chunker is forced to limit the chunk size assert_eq!( Chunker::from(&[0u8; MAX_CHUNK_SIZE + 1]) .next() .unwrap() .len(), MAX_CHUNK_SIZE ); } #[test] fn min_chunk() { let data = generate(1024 * 32); // a "tail" is a window-sized byte sequence that terminates a chunk // we extract one from our test vectors, since every chunk smaller // than MAX_CHUNK_SIZE ends in a tail let mut tail = [0; WINDOW_SIZE]; tail.copy_from_slice({ let chunk = Chunker::from(&data).next().unwrap(); chunk.rchunks_exact(WINDOW_SIZE).next().unwrap() }); let mut data = vec![0; MIN_CHUNK_SIZE - WINDOW_SIZE]; data.extend(tail); data.push(0); assert_eq!(Chunker::from(&data).next().unwrap().len(), MIN_CHUNK_SIZE); } #[test] fn short_chunk() { assert_eq!( Chunker::from(&[0u8; MIN_CHUNK_SIZE / 2]) .next() .unwrap() .len(), MIN_CHUNK_SIZE / 2 ); } #[test] fn size_hint() { assert_eq!(Chunker::from(&[]).size_hint(), (0, Some(0))); assert_eq!(Chunker::from(&[0]).size_hint(), (1, Some(1))); assert_eq!( Chunker::from(&[0; MAX_CHUNK_SIZE]).size_hint(), (1, Some(MAX_CHUNK_SIZE / MIN_CHUNK_SIZE)) ); assert_eq!( Chunker::from(&[0; MAX_CHUNK_SIZE + 1]).size_hint(), (2, Some(MAX_CHUNK_SIZE / MIN_CHUNK_SIZE + 1)) ); } #[test] fn discriminator() { assert_eq!( DISCRIMINATOR, discriminator_from_average(AVG_CHUNK_SIZE as u64) ); } }