add persy backend and make building optional

This commit is contained in:
Jonathan de Jong 2022-03-19 16:50:54 +01:00
parent 0e0a025c37
commit 805baa0705
6 changed files with 276 additions and 19 deletions

104
Cargo.lock generated
View file

@ -154,6 +154,7 @@ dependencies = [
"anyhow", "anyhow",
"heed", "heed",
"itertools", "itertools",
"persy",
"rocksdb", "rocksdb",
"rusqlite", "rusqlite",
"sled", "sled",
@ -179,6 +180,21 @@ dependencies = [
"conduit_iface", "conduit_iface",
] ]
[[package]]
name = "crc"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49fc9a695bca7f35f5f4c15cddc84415f66a74ea78eef08e90c5024f2b540e23"
dependencies = [
"crc-catalog",
]
[[package]]
name = "crc-catalog"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403"
[[package]] [[package]]
name = "crc32fast" name = "crc32fast"
version = "1.3.1" version = "1.3.1"
@ -230,6 +246,12 @@ dependencies = [
"lazy_static", "lazy_static",
] ]
[[package]]
name = "data-encoding"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57"
[[package]] [[package]]
name = "either" name = "either"
version = "1.6.1" version = "1.6.1"
@ -452,6 +474,12 @@ dependencies = [
"vcpkg", "vcpkg",
] ]
[[package]]
name = "linked-hash-map"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3"
[[package]] [[package]]
name = "lmdb-rkv-sys" name = "lmdb-rkv-sys"
version = "0.11.0" version = "0.11.0"
@ -519,6 +547,15 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [
"autocfg",
]
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.9.0" version = "1.9.0"
@ -572,12 +609,34 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]]
name = "persy"
version = "1.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5af61053f1daed3ff0265fad7f924e43ce07642a336c79304f8e5aec205460fb"
dependencies = [
"crc",
"data-encoding",
"fs2",
"linked-hash-map",
"rand",
"thiserror",
"unsigned-varint",
"zigzag",
]
[[package]] [[package]]
name = "pkg-config" name = "pkg-config"
version = "0.3.24" version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe" checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe"
[[package]]
name = "ppv-lite86"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.36" version = "1.0.36"
@ -596,6 +655,36 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
dependencies = [
"getrandom",
]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.2.10" version = "0.2.10"
@ -820,6 +909,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "unsigned-varint"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d86a8dc7f45e4c1b0d30e43038c38f274e77af056aa5f74b93c2cf9eb3c1c836"
[[package]] [[package]]
name = "url" name = "url"
version = "2.2.2" version = "2.2.2"
@ -878,6 +973,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "zigzag"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70b40401a28d86ce16a330b863b86fd7dbee4d7c940587ab09ab8c019f9e3fdf"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "zstd" name = "zstd"
version = "0.9.2+zstd.1.5.1" version = "0.9.2+zstd.1.5.1"

View file

@ -7,9 +7,16 @@ edition = "2018"
[dependencies] [dependencies]
itertools = "0.10.1" itertools = "0.10.1"
sled = { version = "0.34.6", features = ["compression", "no_metrics"] }
rusqlite = { version = "0.25.3", features = ["bundled"] }
anyhow = "1.0.42"
heed = { git = "https://github.com/timokoesters/heed.git", rev = "f6f825da7fb2c758867e05ad973ef800a6fe1d5d" }
thiserror = "1.0.26" thiserror = "1.0.26"
rocksdb = { version = "0.17.0", features = ["multi-threaded-cf", "zstd"] } anyhow = "1.0.42"
sled = { version = "0.34.6", features = ["compression", "no_metrics"], optional = true }
rusqlite = { version = "0.25.3", features = ["bundled"], optional = true }
heed = { git = "https://github.com/timokoesters/heed.git", rev = "f6f825da7fb2c758867e05ad973ef800a6fe1d5d", optional = true }
rocksdb = { version = "0.17.0", features = ["multi-threaded-cf", "zstd"], optional = true }
persy = { version = "1.2", optional = true }
[features]
default = ["sled", "persy", "heed", "sqlite", "rocksdb"]
sqlite = ["rusqlite"]

View file

@ -1,6 +1,12 @@
#[cfg(feature = "heed")]
pub mod heed; pub mod heed;
#[cfg(feature = "persy")]
pub mod persy;
#[cfg(feature = "rocksdb")]
pub mod rocksdb; pub mod rocksdb;
#[cfg(feature = "sled")]
pub mod sled; pub mod sled;
#[cfg(feature = "sqlite")]
pub mod sqlite; pub mod sqlite;
use itertools::Itertools; use itertools::Itertools;

View file

@ -0,0 +1,98 @@
use super::{Database, KVIter, Segment, SegmentIter};
use persy::{ByteVec, Persy};
use std::path::Path;
pub fn new_db<P: AsRef<Path>>(path: P) -> anyhow::Result<PersyDB> {
let path = Path::new("./db.persy").join(path);
let persy = persy::OpenOptions::new()
.create(true)
.config(persy::Config::new())
.open(&path)?;
Ok(PersyDB { persy })
}
pub struct PersyDB {
persy: Persy,
}
impl Database for PersyDB {
fn names<'a>(&'a self) -> Vec<Vec<u8>> {
self.persy
.list_indexes()
.unwrap()
.iter()
.map(|(s, _)| s.as_bytes().to_vec())
.collect()
}
fn segment<'a>(&'a mut self, name: Vec<u8>) -> Option<Box<dyn Segment + 'a>> {
let string = String::from_utf8(name).unwrap();
if !self.persy.exists_index(&string).unwrap() {
use persy::ValueMode;
let mut tx = self.persy.begin().unwrap();
tx.create_index::<ByteVec, ByteVec>(&string, ValueMode::Replace)
.unwrap();
tx.prepare().unwrap().commit().unwrap();
}
Some(Box::new(PersySeg {
db: self,
name: string,
}))
}
fn flush(&mut self) {
// NOOP
}
}
pub struct PersySeg<'a> {
db: &'a mut PersyDB,
name: String,
}
impl<'r> Segment for PersySeg<'r> {
fn batch_insert<'a>(
&'a mut self,
batch: Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + 'a>,
) -> anyhow::Result<()> {
let mut tx = self.db.persy.begin()?;
for (key, value) in batch {
tx.put::<ByteVec, ByteVec>(
&self.name,
ByteVec::from(key.clone()),
ByteVec::from(value),
)?;
}
tx.prepare()?.commit()?;
Ok(())
}
fn get_iter<'a>(&'a mut self) -> Box<dyn SegmentIter + 'a> {
Box::new(PersySegIter(self, &self.name))
}
}
pub struct PersySegIter<'a>(&'a PersySeg<'a>, &'a str);
impl<'r> SegmentIter for PersySegIter<'r> {
fn iter<'a>(&'a mut self) -> KVIter<'a> {
Box::new(
self.0
.db
.persy
.range::<ByteVec, ByteVec, _>(self.1, ..)
.unwrap()
.filter_map(|(k, v)| {
v.into_iter()
.map(|val| ((*k).to_owned().into(), (*val).to_owned().into()))
.next()
}),
)
}
}

View file

@ -8,5 +8,14 @@ edition = "2018"
[dependencies] [dependencies]
clap = "2.33.3" clap = "2.33.3"
anyhow = "1.0.41" anyhow = "1.0.41"
conduit_iface = { path = "../iface/" } conduit_iface = { path = "../iface/", default-features = false }
thiserror = "1.0.26" thiserror = "1.0.26"
[features]
default = ["sled", "sqlite", "rocksdb"]
sled = ["conduit_iface/sled"]
persy = ["conduit_iface/persy"]
heed = ["conduit_iface/heed"]
sqlite = ["conduit_iface/sqlite"]
rocksdb = ["conduit_iface/rocksdb"]

View file

@ -1,26 +1,36 @@
use clap::{App, Arg}; use clap::{App, Arg};
use conduit_iface::db::{ use conduit_iface::db::{self, copy_database};
self, copy_database, heed::HeedDB, rocksdb::RocksDB, sled::SledDB, sqlite::SqliteDB,
};
use std::{ use std::{
ops::{Deref, DerefMut}, ops::{Deref, DerefMut},
path::{Path, PathBuf}, path::{Path, PathBuf},
}; };
enum Database { enum Database {
Sled(SledDB), #[cfg(feature = "sled")]
Sqlite(SqliteDB), Sled(db::sled::SledDB),
Heed(HeedDB), #[cfg(feature = "heed")]
Rocks(RocksDB), Heed(db::heed::HeedDB),
#[cfg(feature = "sqlite")]
Sqlite(db::sqlite::SqliteDB),
#[cfg(feature = "rocksdb")]
Rocks(db::rocksdb::RocksDB),
#[cfg(feature = "persy")]
Persy(db::persy::PersyDB),
} }
impl Database { impl Database {
fn new(name: &str, path: PathBuf) -> anyhow::Result<Self> { fn new(name: &str, path: PathBuf) -> anyhow::Result<Self> {
Ok(match name { Ok(match name {
"sled" => Self::Sled(SledDB::new(db::sled::new_db(path)?)), #[cfg(feature = "sled")]
"heed" => Self::Heed(HeedDB::new(db::heed::new_db(path)?)), "sled" => Self::Sled(db::sled::SledDB::new(db::sled::new_db(path)?)),
"sqlite" => Self::Sqlite(SqliteDB::new(db::sqlite::new_conn(path)?)), #[cfg(feature = "heed")]
"heed" => Self::Heed(db::heed::HeedDB::new(db::heed::new_db(path)?)),
#[cfg(feature = "sqlite")]
"sqlite" => Self::Sqlite(db::sqlite::SqliteDB::new(db::sqlite::new_conn(path)?)),
#[cfg(feature = "rocksdb")]
"rocks" => Self::Rocks(db::rocksdb::new_conn(path)?), "rocks" => Self::Rocks(db::rocksdb::new_conn(path)?),
#[cfg(feature = "persy")]
"persy" => Self::Persy(db::persy::new_db(path)?),
_ => panic!("unknown database type: {}", name), _ => panic!("unknown database type: {}", name),
}) })
} }
@ -31,10 +41,16 @@ impl Deref for Database {
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
match self { match self {
#[cfg(feature = "sled")]
Database::Sled(db) => db, Database::Sled(db) => db,
Database::Sqlite(db) => db, #[cfg(feature = "heed")]
Database::Heed(db) => db, Database::Heed(db) => db,
#[cfg(feature = "sqlite")]
Database::Sqlite(db) => db,
#[cfg(feature = "rocksdb")]
Database::Rocks(db) => db, Database::Rocks(db) => db,
#[cfg(feature = "persy")]
Database::Persy(db) => db,
} }
} }
} }
@ -42,15 +58,32 @@ impl Deref for Database {
impl DerefMut for Database { impl DerefMut for Database {
fn deref_mut(&mut self) -> &mut Self::Target { fn deref_mut(&mut self) -> &mut Self::Target {
match self { match self {
#[cfg(feature = "sled")]
Database::Sled(db) => db, Database::Sled(db) => db,
Database::Sqlite(db) => db, #[cfg(feature = "heed")]
Database::Heed(db) => db, Database::Heed(db) => db,
#[cfg(feature = "sqlite")]
Database::Sqlite(db) => db,
#[cfg(feature = "rocksdb")]
Database::Rocks(db) => db, Database::Rocks(db) => db,
#[cfg(feature = "persy")]
Database::Persy(db) => db,
} }
} }
} }
const DATABASES: &[&str] = &["heed", "sqlite", "sled", "rocks"]; const DATABASES: &[&str] = &[
#[cfg(feature = "sled")]
"sled",
#[cfg(feature = "heed")]
"heed",
#[cfg(feature = "sqlite")]
"sqlite",
#[cfg(feature = "rocksdb")]
"rocks",
#[cfg(feature = "persy")]
"persy",
];
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
let matches = App::new("Conduit Sled to Sqlite Migrator") let matches = App::new("Conduit Sled to Sqlite Migrator")