add rocksdb support

This commit is contained in:
Jonathan de Jong 2021-11-27 14:32:00 +01:00
parent a04057ed4c
commit e20e99493e
5 changed files with 249 additions and 3 deletions

128
Cargo.lock generated
View file

@ -54,6 +54,25 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "bindgen"
version = "0.59.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8"
dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"lazy_static",
"lazycell",
"peeking_take_while",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
]
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "1.2.1" version = "1.2.1"
@ -81,6 +100,15 @@ dependencies = [
"jobserver", "jobserver",
] ]
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "0.1.10" version = "0.1.10"
@ -93,6 +121,17 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clang-sys"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa66045b9cb23c2e9c1520732030608b02ee07e5cfaa5a521ec15ded7fa24c90"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]] [[package]]
name = "clap" name = "clap"
version = "2.33.3" version = "2.33.3"
@ -115,6 +154,7 @@ dependencies = [
"anyhow", "anyhow",
"heed", "heed",
"itertools 0.10.1", "itertools 0.10.1",
"rocksdb",
"rusqlite", "rusqlite",
"sled", "sled",
"thiserror", "thiserror",
@ -376,12 +416,40 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.98" version = "0.2.98"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790" checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790"
[[package]]
name = "libloading"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afe203d669ec979b7128619bae5a63b7b42e9203c1b29146079ee05e2f604b52"
dependencies = [
"cfg-if 1.0.0",
"winapi",
]
[[package]]
name = "librocksdb-sys"
version = "6.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c309a9d2470844aceb9a4a098cf5286154d20596868b75a6b36357d2bb9ca25d"
dependencies = [
"bindgen",
"cc",
"glob",
"libc",
]
[[package]] [[package]]
name = "libsqlite3-sys" name = "libsqlite3-sys"
version = "0.22.2" version = "0.22.2"
@ -443,6 +511,23 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "nom"
version = "7.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109"
dependencies = [
"memchr",
"minimal-lexical",
"version_check",
]
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.8.0" version = "1.8.0"
@ -484,6 +569,12 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "peeking_take_while"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]] [[package]]
name = "percent-encoding" name = "percent-encoding"
version = "2.1.0" version = "2.1.0"
@ -523,6 +614,31 @@ dependencies = [
"bitflags", "bitflags",
] ]
[[package]]
name = "regex"
version = "1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "rocksdb"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c749134fda8bfc90d0de643d59bfc841dcb3ac8a1062e12b6754bd60235c48b3"
dependencies = [
"libc",
"librocksdb-sys",
]
[[package]] [[package]]
name = "rusqlite" name = "rusqlite"
version = "0.25.3" version = "0.25.3"
@ -538,6 +654,12 @@ dependencies = [
"smallvec", "smallvec",
] ]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.5" version = "1.0.5"
@ -581,6 +703,12 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "shlex"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
[[package]] [[package]]
name = "sled" name = "sled"
version = "0.34.6" version = "0.34.6"

View file

@ -12,3 +12,4 @@ rusqlite = { version = "0.25.3", features = ["bundled"] }
anyhow = "1.0.42" anyhow = "1.0.42"
heed = { git = "https://github.com/timokoesters/heed.git", rev = "f6f825da7fb2c758867e05ad973ef800a6fe1d5d" } heed = { git = "https://github.com/timokoesters/heed.git", rev = "f6f825da7fb2c758867e05ad973ef800a6fe1d5d" }
thiserror = "1.0.26" thiserror = "1.0.26"
rocksdb = { version = "0.16.0", features = ["multi-threaded-cf"] }

View file

@ -1,4 +1,5 @@
pub mod heed; pub mod heed;
pub mod rocksdb;
pub mod sled; pub mod sled;
pub mod sqlite; pub mod sqlite;

View file

@ -0,0 +1,110 @@
use std::path::Path;
use super::{Database, Segment};
use rocksdb::{DBWithThreadMode, MultiThreaded};
pub fn new_conn<P: AsRef<Path>>(path: P) -> Result<RocksDB, rocksdb::Error> {
let mut db_opts = rocksdb::Options::default();
db_opts.create_if_missing(true);
db_opts.set_max_open_files(16);
db_opts.set_compaction_style(rocksdb::DBCompactionStyle::Level);
db_opts.set_compression_type(rocksdb::DBCompressionType::Snappy);
db_opts.set_target_file_size_base(256 << 20);
db_opts.set_write_buffer_size(256 << 20);
let mut block_based_options = rocksdb::BlockBasedOptions::default();
block_based_options.set_block_size(512 << 10);
db_opts.set_block_based_table_factory(&block_based_options);
let cfs = DBWithThreadMode::<MultiThreaded>::list_cf(&db_opts, &path).unwrap_or_default();
let db = DBWithThreadMode::<MultiThreaded>::open_cf_descriptors(
&db_opts,
&path,
cfs.iter().map(|name| {
let mut options = rocksdb::Options::default();
let prefix_extractor = rocksdb::SliceTransform::create_fixed_prefix(1);
options.set_prefix_extractor(prefix_extractor);
rocksdb::ColumnFamilyDescriptor::new(name, options)
}),
)?;
Ok(RocksDB {
rocks: db,
old_cfs: cfs,
})
}
pub struct RocksDB {
rocks: DBWithThreadMode<MultiThreaded>,
old_cfs: Vec<String>,
}
impl Database for RocksDB {
fn segment<'a>(&'a mut self, name: Vec<u8>) -> Option<Box<dyn Segment + 'a>> {
let string = String::from_utf8(name).unwrap();
if !self.old_cfs.contains(&string) {
// Create if it didn't exist
let mut options = rocksdb::Options::default();
let prefix_extractor = rocksdb::SliceTransform::create_fixed_prefix(1);
options.set_prefix_extractor(prefix_extractor);
let _ = self.rocks.create_cf(&string, &options);
println!("created cf");
}
Some(Box::new(RocksDBCF {
db: self,
name: string,
}))
}
fn names<'a>(&'a self) -> Vec<Vec<u8>> {
self.old_cfs.iter().map(|v| v.as_bytes().to_vec()).collect()
}
}
pub struct RocksDBCF<'a> {
db: &'a mut RocksDB,
name: String,
}
impl RocksDBCF<'_> {
fn cf(&self) -> rocksdb::BoundColumnFamily<'_> {
self.db.rocks.cf_handle(&self.name).unwrap()
}
}
impl<'r> Segment for RocksDBCF<'r> {
fn batch_insert<'a>(
&'a mut self,
batch: Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + 'a>,
) -> anyhow::Result<()> {
let cf = self.cf();
for (key, value) in batch {
self.db.rocks.put_cf(cf, key, value)?;
}
Ok(())
}
fn get_iter(&mut self) -> Box<dyn super::SegmentIter + '_> {
Box::new(RocksDBCFIter(self))
}
}
pub struct RocksDBCFIter<'a>(&'a RocksDBCF<'a>);
impl super::SegmentIter for RocksDBCFIter<'_> {
fn iter<'a>(&'a mut self) -> super::KVIter<'a> {
Box::new(
self.0
.db
.rocks
.iterator_cf(self.0.cf(), rocksdb::IteratorMode::Start)
.map(|(k, v)| (Vec::from(k), Vec::from(v))),
)
}
}

View file

@ -1,5 +1,7 @@
use clap::{App, Arg}; use clap::{App, Arg};
use conduit_iface::db::{self, copy_database, heed::HeedDB, sled::SledDB, sqlite::SqliteDB}; use conduit_iface::db::{
self, copy_database, heed::HeedDB, rocksdb::RocksDB, sled::SledDB, sqlite::SqliteDB,
};
use std::{ use std::{
ops::{Deref, DerefMut}, ops::{Deref, DerefMut},
path::{Path, PathBuf}, path::{Path, PathBuf},
@ -9,6 +11,7 @@ enum Database {
Sled(SledDB), Sled(SledDB),
Sqlite(SqliteDB), Sqlite(SqliteDB),
Heed(HeedDB), Heed(HeedDB),
Rocks(RocksDB),
} }
impl Database { impl Database {
@ -17,6 +20,7 @@ impl Database {
"sled" => Self::Sled(SledDB::new(db::sled::new_db(path)?)), "sled" => Self::Sled(SledDB::new(db::sled::new_db(path)?)),
"heed" => Self::Heed(HeedDB::new(db::heed::new_db(path)?)), "heed" => Self::Heed(HeedDB::new(db::heed::new_db(path)?)),
"sqlite" => Self::Sqlite(SqliteDB::new(db::sqlite::new_conn(path)?)), "sqlite" => Self::Sqlite(SqliteDB::new(db::sqlite::new_conn(path)?)),
"rocks" => Self::Rocks(db::rocksdb::new_conn(path)?),
_ => panic!("unknown database type: {}", name), _ => panic!("unknown database type: {}", name),
}) })
} }
@ -30,6 +34,7 @@ impl Deref for Database {
Database::Sled(db) => db, Database::Sled(db) => db,
Database::Sqlite(db) => db, Database::Sqlite(db) => db,
Database::Heed(db) => db, Database::Heed(db) => db,
Database::Rocks(db) => db,
} }
} }
} }
@ -40,11 +45,12 @@ impl DerefMut for Database {
Database::Sled(db) => db, Database::Sled(db) => db,
Database::Sqlite(db) => db, Database::Sqlite(db) => db,
Database::Heed(db) => db, Database::Heed(db) => db,
Database::Rocks(db) => db,
} }
} }
} }
const DATABASES: &[&str] = &["heed", "sqlite", "sled"]; const DATABASES: &[&str] = &["heed", "sqlite", "sled", "rocks"];
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
let matches = App::new("Conduit Sled to Sqlite Migrator") let matches = App::new("Conduit Sled to Sqlite Migrator")