diff --git a/Cargo.lock b/Cargo.lock index a0aa22a..bb5910a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -154,6 +154,7 @@ dependencies = [ "anyhow", "heed", "itertools", + "persy", "rocksdb", "rusqlite", "sled", @@ -179,6 +180,21 @@ dependencies = [ "conduit_iface", ] +[[package]] +name = "crc" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49fc9a695bca7f35f5f4c15cddc84415f66a74ea78eef08e90c5024f2b540e23" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403" + [[package]] name = "crc32fast" version = "1.3.1" @@ -230,6 +246,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "data-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57" + [[package]] name = "either" version = "1.6.1" @@ -452,6 +474,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linked-hash-map" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" + [[package]] name = "lmdb-rkv-sys" version = "0.11.0" @@ -519,6 +547,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.9.0" @@ -572,12 +609,34 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" +[[package]] +name = "persy" +version = "1.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5af61053f1daed3ff0265fad7f924e43ce07642a336c79304f8e5aec205460fb" +dependencies = [ + "crc", + "data-encoding", + "fs2", + "linked-hash-map", + "rand", + "thiserror", + "unsigned-varint", + "zigzag", +] + [[package]] name = "pkg-config" version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe" +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + [[package]] name = "proc-macro2" version = "1.0.36" @@ -596,6 +655,36 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + [[package]] name = "redox_syscall" version = "0.2.10" @@ -820,6 +909,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +[[package]] +name = "unsigned-varint" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d86a8dc7f45e4c1b0d30e43038c38f274e77af056aa5f74b93c2cf9eb3c1c836" + [[package]] name = "url" version = "2.2.2" @@ -878,6 +973,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "zigzag" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70b40401a28d86ce16a330b863b86fd7dbee4d7c940587ab09ab8c019f9e3fdf" +dependencies = [ + "num-traits", +] + [[package]] name = "zstd" version = "0.9.2+zstd.1.5.1" diff --git a/tools/iface/Cargo.toml b/tools/iface/Cargo.toml index 42f2356..6f302df 100644 --- a/tools/iface/Cargo.toml +++ b/tools/iface/Cargo.toml @@ -7,9 +7,16 @@ edition = "2018" [dependencies] itertools = "0.10.1" -sled = { version = "0.34.6", features = ["compression", "no_metrics"] } -rusqlite = { version = "0.25.3", features = ["bundled"] } -anyhow = "1.0.42" -heed = { git = "https://github.com/timokoesters/heed.git", rev = "f6f825da7fb2c758867e05ad973ef800a6fe1d5d" } thiserror = "1.0.26" -rocksdb = { version = "0.17.0", features = ["multi-threaded-cf", "zstd"] } +anyhow = "1.0.42" + +sled = { version = "0.34.6", features = ["compression", "no_metrics"], optional = true } +rusqlite = { version = "0.25.3", features = ["bundled"], optional = true } +heed = { git = "https://github.com/timokoesters/heed.git", rev = "f6f825da7fb2c758867e05ad973ef800a6fe1d5d", optional = true } +rocksdb = { version = "0.17.0", features = ["multi-threaded-cf", "zstd"], optional = true } +persy = { version = "1.2", optional = true } + +[features] +default = ["sled", "persy", "heed", "sqlite", "rocksdb"] + +sqlite = ["rusqlite"] \ No newline at end of file diff --git a/tools/iface/src/db.rs b/tools/iface/src/db.rs index 50b2720..e695f10 100644 --- a/tools/iface/src/db.rs +++ b/tools/iface/src/db.rs @@ -1,6 +1,12 @@ +#[cfg(feature = "heed")] pub mod heed; +#[cfg(feature = "persy")] +pub mod persy; +#[cfg(feature = "rocksdb")] pub mod rocksdb; +#[cfg(feature = "sled")] pub mod sled; +#[cfg(feature = "sqlite")] pub mod sqlite; use itertools::Itertools; diff --git a/tools/iface/src/db/persy.rs b/tools/iface/src/db/persy.rs new file mode 100644 index 0000000..fe603bb --- /dev/null +++ b/tools/iface/src/db/persy.rs @@ -0,0 +1,98 @@ +use super::{Database, KVIter, Segment, SegmentIter}; +use persy::{ByteVec, Persy}; +use std::path::Path; + +pub fn new_db<P: AsRef<Path>>(path: P) -> anyhow::Result<PersyDB> { + let path = Path::new("./db.persy").join(path); + + let persy = persy::OpenOptions::new() + .create(true) + .config(persy::Config::new()) + .open(&path)?; + + Ok(PersyDB { persy }) +} + +pub struct PersyDB { + persy: Persy, +} + +impl Database for PersyDB { + fn names<'a>(&'a self) -> Vec<Vec<u8>> { + self.persy + .list_indexes() + .unwrap() + .iter() + .map(|(s, _)| s.as_bytes().to_vec()) + .collect() + } + + fn segment<'a>(&'a mut self, name: Vec<u8>) -> Option<Box<dyn Segment + 'a>> { + let string = String::from_utf8(name).unwrap(); + + if !self.persy.exists_index(&string).unwrap() { + use persy::ValueMode; + + let mut tx = self.persy.begin().unwrap(); + tx.create_index::<ByteVec, ByteVec>(&string, ValueMode::Replace) + .unwrap(); + tx.prepare().unwrap().commit().unwrap(); + } + + Some(Box::new(PersySeg { + db: self, + name: string, + })) + } + + fn flush(&mut self) { + // NOOP + } +} + +pub struct PersySeg<'a> { + db: &'a mut PersyDB, + name: String, +} + +impl<'r> Segment for PersySeg<'r> { + fn batch_insert<'a>( + &'a mut self, + batch: Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + 'a>, + ) -> anyhow::Result<()> { + let mut tx = self.db.persy.begin()?; + for (key, value) in batch { + tx.put::<ByteVec, ByteVec>( + &self.name, + ByteVec::from(key.clone()), + ByteVec::from(value), + )?; + } + tx.prepare()?.commit()?; + + Ok(()) + } + + fn get_iter<'a>(&'a mut self) -> Box<dyn SegmentIter + 'a> { + Box::new(PersySegIter(self, &self.name)) + } +} + +pub struct PersySegIter<'a>(&'a PersySeg<'a>, &'a str); + +impl<'r> SegmentIter for PersySegIter<'r> { + fn iter<'a>(&'a mut self) -> KVIter<'a> { + Box::new( + self.0 + .db + .persy + .range::<ByteVec, ByteVec, _>(self.1, ..) + .unwrap() + .filter_map(|(k, v)| { + v.into_iter() + .map(|val| ((*k).to_owned().into(), (*val).to_owned().into())) + .next() + }), + ) + } +} diff --git a/tools/migrate/Cargo.toml b/tools/migrate/Cargo.toml index 70086b6..fa4fe8d 100644 --- a/tools/migrate/Cargo.toml +++ b/tools/migrate/Cargo.toml @@ -8,5 +8,14 @@ edition = "2018" [dependencies] clap = "2.33.3" anyhow = "1.0.41" -conduit_iface = { path = "../iface/" } +conduit_iface = { path = "../iface/", default-features = false } thiserror = "1.0.26" + +[features] +default = ["sled", "sqlite", "rocksdb"] + +sled = ["conduit_iface/sled"] +persy = ["conduit_iface/persy"] +heed = ["conduit_iface/heed"] +sqlite = ["conduit_iface/sqlite"] +rocksdb = ["conduit_iface/rocksdb"] \ No newline at end of file diff --git a/tools/migrate/src/main.rs b/tools/migrate/src/main.rs index 41f221d..9d1de77 100644 --- a/tools/migrate/src/main.rs +++ b/tools/migrate/src/main.rs @@ -1,26 +1,36 @@ use clap::{App, Arg}; -use conduit_iface::db::{ - self, copy_database, heed::HeedDB, rocksdb::RocksDB, sled::SledDB, sqlite::SqliteDB, -}; +use conduit_iface::db::{self, copy_database}; use std::{ ops::{Deref, DerefMut}, path::{Path, PathBuf}, }; enum Database { - Sled(SledDB), - Sqlite(SqliteDB), - Heed(HeedDB), - Rocks(RocksDB), + #[cfg(feature = "sled")] + Sled(db::sled::SledDB), + #[cfg(feature = "heed")] + Heed(db::heed::HeedDB), + #[cfg(feature = "sqlite")] + Sqlite(db::sqlite::SqliteDB), + #[cfg(feature = "rocksdb")] + Rocks(db::rocksdb::RocksDB), + #[cfg(feature = "persy")] + Persy(db::persy::PersyDB), } impl Database { fn new(name: &str, path: PathBuf) -> anyhow::Result<Self> { Ok(match name { - "sled" => Self::Sled(SledDB::new(db::sled::new_db(path)?)), - "heed" => Self::Heed(HeedDB::new(db::heed::new_db(path)?)), - "sqlite" => Self::Sqlite(SqliteDB::new(db::sqlite::new_conn(path)?)), + #[cfg(feature = "sled")] + "sled" => Self::Sled(db::sled::SledDB::new(db::sled::new_db(path)?)), + #[cfg(feature = "heed")] + "heed" => Self::Heed(db::heed::HeedDB::new(db::heed::new_db(path)?)), + #[cfg(feature = "sqlite")] + "sqlite" => Self::Sqlite(db::sqlite::SqliteDB::new(db::sqlite::new_conn(path)?)), + #[cfg(feature = "rocksdb")] "rocks" => Self::Rocks(db::rocksdb::new_conn(path)?), + #[cfg(feature = "persy")] + "persy" => Self::Persy(db::persy::new_db(path)?), _ => panic!("unknown database type: {}", name), }) } @@ -31,10 +41,16 @@ impl Deref for Database { fn deref(&self) -> &Self::Target { match self { + #[cfg(feature = "sled")] Database::Sled(db) => db, - Database::Sqlite(db) => db, + #[cfg(feature = "heed")] Database::Heed(db) => db, + #[cfg(feature = "sqlite")] + Database::Sqlite(db) => db, + #[cfg(feature = "rocksdb")] Database::Rocks(db) => db, + #[cfg(feature = "persy")] + Database::Persy(db) => db, } } } @@ -42,15 +58,32 @@ impl Deref for Database { impl DerefMut for Database { fn deref_mut(&mut self) -> &mut Self::Target { match self { + #[cfg(feature = "sled")] Database::Sled(db) => db, - Database::Sqlite(db) => db, + #[cfg(feature = "heed")] Database::Heed(db) => db, + #[cfg(feature = "sqlite")] + Database::Sqlite(db) => db, + #[cfg(feature = "rocksdb")] Database::Rocks(db) => db, + #[cfg(feature = "persy")] + Database::Persy(db) => db, } } } -const DATABASES: &[&str] = &["heed", "sqlite", "sled", "rocks"]; +const DATABASES: &[&str] = &[ + #[cfg(feature = "sled")] + "sled", + #[cfg(feature = "heed")] + "heed", + #[cfg(feature = "sqlite")] + "sqlite", + #[cfg(feature = "rocksdb")] + "rocks", + #[cfg(feature = "persy")] + "persy", +]; fn main() -> anyhow::Result<()> { let matches = App::new("Conduit Sled to Sqlite Migrator")