// SPDX-FileCopyrightText: edef // SPDX-License-Identifier: OSL-3.0 use { byteorder::{BigEndian, ByteOrder}, prost::Message, std::{collections::BTreeMap, fs, io, os::unix::fs::PermissionsExt, path::Path}, }; pub mod store { include!(concat!(env!("OUT_DIR"), "/fossil.store.rs")); } const DIGEST_BYTES: usize = blake3::OUT_LEN; const OFFSET_BYTES: usize = 4; pub struct Store { db: sled::Db, } impl Store { pub fn open(path: impl AsRef) -> io::Result { let db = sled::open(path)?; Ok(Store { db }) } pub fn add_path(&self, path: impl AsRef) -> Node { let path = path.as_ref(); let meta = fs::symlink_metadata(path).unwrap(); match meta.file_type() { ty if ty.is_dir() => { let mut d = Directory::new(); for entry in path.read_dir().unwrap() { let entry = entry.unwrap(); let name = entry.file_name().into_string().unwrap(); d.children.insert(name, self.add_path(entry.path())); } let blob = d.into_pb().encode_to_vec(); Node::Directory { r#ref: self.write_blob(&blob), } } ty if ty.is_file() => { let executable = (meta.permissions().mode() & 0o100) != 0; let blob = fs::read(path).unwrap(); Node::File { executable, r#ref: self.write_blob(&blob), } } ty if ty.is_symlink() => { let target = path .read_link() .unwrap() .to_str() .expect("symlink target is invalid UTF-8") .to_owned(); Node::Link { target } } _ => panic!("not a symlink or a regular file"), } } fn write_blob(&self, data: &[u8]) -> Digest { let digest = { let mut h = blake3::Hasher::new(); h.update_with_join::(&data); *h.finalize().as_bytes() }; // TODO(edef): maybe don't use the default tree? // we should probably have a "blob" tree, // and reserve the default tree for DB metadata self.db .transaction::<_, _, sled::Error>(|db| { for (n, chunk) in data.chunks(4096).enumerate() { let mut key = [0u8; DIGEST_BYTES + OFFSET_BYTES]; key[..DIGEST_BYTES].copy_from_slice(&digest); BigEndian::write_u32(&mut key[DIGEST_BYTES..], n as u32); db.insert(&key[..], chunk)?; } Ok(()) }) .unwrap(); digest.into() } pub fn read_blob(&self, r#ref: Digest) -> Vec { let mut buffer = Vec::new(); let mut h = blake3::Hasher::new(); for element in self.db.scan_prefix(r#ref.as_bytes()) { let (_, chunk) = element.unwrap(); h.update(&chunk); buffer.extend_from_slice(&chunk); } if buffer.is_empty() { panic!("blob not found"); } if h.finalize() != r#ref { panic!("hash mismatch"); } buffer } } pub type Digest = blake3::Hash; pub struct Directory { pub children: BTreeMap, } #[derive(Clone)] pub enum Node { Directory { r#ref: Digest }, File { r#ref: Digest, executable: bool }, Link { target: String }, } impl Directory { pub fn new() -> Directory { Directory { children: BTreeMap::new(), } } pub fn into_pb(self) -> store::Directory { let mut d = store::Directory::default(); for (name, node) in self.children.into_iter() { match node { Node::Directory { r#ref } => d.directories.push(store::DirectoryNode { name, r#ref: r#ref.as_bytes().to_vec(), }), Node::File { r#ref, executable } => d.files.push(store::FileNode { name, r#ref: r#ref.as_bytes().to_vec(), executable, }), Node::Link { target } => d.links.push(store::LinkNode { name, target }), } } d } pub fn from_pb(pb: store::Directory) -> Directory { let mut children = BTreeMap::new(); for child in pb.directories { children.insert( child.name, Node::Directory { r#ref: digest_from_bytes(&child.r#ref), }, ); } for child in pb.files { children.insert( child.name, Node::File { r#ref: digest_from_bytes(&child.r#ref), executable: child.executable, }, ); } for child in pb.links { children.insert( child.name, Node::Link { target: child.target, }, ); } Directory { children } } } #[track_caller] fn digest_from_bytes(bytes: &[u8]) -> Digest { if bytes.len() != DIGEST_BYTES { panic!( "digest is {} bytes, expecting {} bytes", bytes.len(), DIGEST_BYTES ); } let mut buffer = [0; DIGEST_BYTES]; buffer.copy_from_slice(bytes); buffer.into() }