From db7c54f92f386a94db8af7a12626d2657b4dd640 Mon Sep 17 00:00:00 2001 From: edef Date: Sat, 14 Aug 2021 21:28:14 +0000 Subject: ripple/fossil: a basic content-addressable store Fossil stores content-addressed blobs of file contents and Protobuf-encoded directory listings, backed by Sled. Change-Id: I8b49de6342218ca00755cec980b1d0cfb18878a7 --- ripple/fossil/src/lib.rs | 206 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 ripple/fossil/src/lib.rs (limited to 'ripple/fossil/src/lib.rs') diff --git a/ripple/fossil/src/lib.rs b/ripple/fossil/src/lib.rs new file mode 100644 index 0000000..6fb5269 --- /dev/null +++ b/ripple/fossil/src/lib.rs @@ -0,0 +1,206 @@ +// SPDX-FileCopyrightText: edef +// SPDX-License-Identifier: OSL-3.0 + +use { + byteorder::{BigEndian, ByteOrder}, + prost::Message, + std::{collections::BTreeMap, fs, io, os::unix::fs::PermissionsExt, path::Path}, +}; + +pub mod store { + include!(concat!(env!("OUT_DIR"), "/fossil.store.rs")); +} + +const DIGEST_BYTES: usize = blake3::OUT_LEN; +const OFFSET_BYTES: usize = 4; + +pub struct Store { + db: sled::Db, +} + +impl Store { + pub fn open>(path: P) -> io::Result { + let db = sled::open(path)?; + Ok(Store { db }) + } + + pub fn add_path>(&self, path: P) -> Node { + let path = path.as_ref(); + let meta = fs::symlink_metadata(path).unwrap(); + + match meta.file_type() { + ty if ty.is_dir() => { + let mut d = Directory::new(); + + for entry in path.read_dir().unwrap() { + let entry = entry.unwrap(); + let name = entry.file_name().into_string().unwrap(); + d.children.insert(name, self.add_path(entry.path())); + } + + let blob = d.into_pb().encode_to_vec(); + + Node::Directory { + r#ref: self.write_blob(&blob), + } + } + ty if ty.is_file() => { + let executable = (meta.permissions().mode() & 0o100) != 0; + + let blob = fs::read(path).unwrap(); + Node::File { + executable, + r#ref: self.write_blob(&blob), + } + } + ty if ty.is_symlink() => { + let target = path + .read_link() + .unwrap() + .to_str() + .expect("symlink target is invalid UTF-8") + .to_owned(); + + Node::Link { target } + } + _ => panic!("not a symlink or a regular file"), + } + } + + fn write_blob(&self, data: &[u8]) -> Digest { + let digest = { + let mut h = blake3::Hasher::new(); + h.update_with_join::(&data); + *h.finalize().as_bytes() + }; + + // TODO(edef): maybe don't use the default tree? + // we should probably have a "blob" tree, + // and reserve the default tree for DB metadata + + self.db + .transaction::<_, _, sled::Error>(|db| { + for (n, chunk) in data.chunks(4096).enumerate() { + let mut key = [0u8; DIGEST_BYTES + OFFSET_BYTES]; + key[..DIGEST_BYTES].copy_from_slice(&digest); + BigEndian::write_u32(&mut key[DIGEST_BYTES..], n as u32); + db.insert(&key[..], chunk)?; + } + Ok(()) + }) + .unwrap(); + + digest.into() + } + + pub fn read_blob(&self, r#ref: Digest) -> Vec { + let mut buffer = Vec::new(); + let mut h = blake3::Hasher::new(); + for element in self.db.scan_prefix(r#ref.as_bytes()) { + let (_, chunk) = element.unwrap(); + h.update(&chunk); + buffer.extend_from_slice(&chunk); + } + + if buffer.len() == 0 { + panic!("blob not found"); + } + + if h.finalize() != r#ref { + panic!("hash mismatch"); + } + + buffer + } +} + +pub type Digest = blake3::Hash; + +pub struct Directory { + pub children: BTreeMap, +} + +#[derive(Clone)] +pub enum Node { + Directory { r#ref: Digest }, + File { r#ref: Digest, executable: bool }, + Link { target: String }, +} + +impl Directory { + pub fn new() -> Directory { + Directory { + children: BTreeMap::new(), + } + } + + pub fn into_pb(self) -> store::Directory { + let mut d = store::Directory::default(); + + for (name, node) in self.children.into_iter() { + match node { + Node::Directory { r#ref } => d.directories.push(store::DirectoryNode { + name, + r#ref: r#ref.as_bytes().to_vec(), + }), + Node::File { r#ref, executable } => d.files.push(store::FileNode { + name, + r#ref: r#ref.as_bytes().to_vec(), + executable, + }), + Node::Link { target } => d.links.push(store::LinkNode { name, target }), + } + } + + d + } + + pub fn from_pb(pb: store::Directory) -> Directory { + let mut children = BTreeMap::new(); + + for child in pb.directories { + children.insert( + child.name, + Node::Directory { + r#ref: digest_from_bytes(&child.r#ref), + }, + ); + } + + for child in pb.files { + children.insert( + child.name, + Node::File { + r#ref: digest_from_bytes(&child.r#ref), + executable: child.executable, + }, + ); + } + + for child in pb.links { + children.insert( + child.name, + Node::Link { + target: child.target, + }, + ); + } + + Directory { children } + } +} + +#[track_caller] +fn digest_from_bytes(bytes: &[u8]) -> Digest { + if bytes.len() != DIGEST_BYTES { + panic!( + "digest is {} bytes, expecting {} bytes", + bytes.len(), + DIGEST_BYTES + ); + } + + let mut buffer = [0; DIGEST_BYTES]; + buffer.copy_from_slice(bytes); + buffer.into() +} -- cgit 1.4.1