diff --git a/Cargo.lock b/Cargo.lock index 46ec3f7..83f25f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -45,12 +45,27 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +[[package]] +name = "bytemuck" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72957246c41db82b8ef88a5486143830adeb8227ef9837740bdec67724cf2c5b" + [[package]] name = "byteorder" version = "1.4.3" @@ -66,6 +81,12 @@ dependencies = [ "jobserver", ] +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + [[package]] name = "cfg-if" version = "1.0.0" @@ -92,9 +113,21 @@ name = "conduit_iface" version = "0.1.0" dependencies = [ "anyhow", + "heed", "itertools 0.10.1", "rusqlite", "sled", + "thiserror", +] + +[[package]] +name = "conduit_migrate" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "conduit_iface", + "thiserror", ] [[package]] @@ -112,7 +145,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -121,20 +154,39 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" dependencies = [ - "cfg-if", - "crossbeam-utils", + "cfg-if 1.0.0", + "crossbeam-utils 0.8.5", "lazy_static", "memoffset", "scopeguard", ] +[[package]] +name = "crossbeam-queue" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c979cd6cfe72335896575c6b5688da489e420d36a27a0b9eb0c73db574b4a4b" +dependencies = [ + "crossbeam-utils 0.6.6", +] + +[[package]] +name = "crossbeam-utils" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6" +dependencies = [ + "cfg-if 0.1.10", + "lazy_static", +] + [[package]] name = "crossbeam-utils" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "lazy_static", ] @@ -156,6 +208,16 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "form_urlencoded" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +dependencies = [ + "matches", + "percent-encoding", +] + [[package]] name = "fs2" version = "0.4.3" @@ -181,7 +243,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "libc", "wasi", ] @@ -210,6 +272,42 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "heed" +version = "0.10.6" +source = "git+https://github.com/timokoesters/heed.git?rev=f6f825da7fb2c758867e05ad973ef800a6fe1d5d#f6f825da7fb2c758867e05ad973ef800a6fe1d5d" +dependencies = [ + "bytemuck", + "byteorder", + "heed-traits", + "heed-types", + "libc", + "lmdb-rkv-sys", + "once_cell", + "page_size", + "serde", + "synchronoise", + "url", +] + +[[package]] +name = "heed-traits" +version = "0.7.0" +source = "git+https://github.com/timokoesters/heed.git?rev=f6f825da7fb2c758867e05ad973ef800a6fe1d5d#f6f825da7fb2c758867e05ad973ef800a6fe1d5d" + +[[package]] +name = "heed-types" +version = "0.7.2" +source = "git+https://github.com/timokoesters/heed.git?rev=f6f825da7fb2c758867e05ad973ef800a6fe1d5d#f6f825da7fb2c758867e05ad973ef800a6fe1d5d" +dependencies = [ + "bincode", + "bytemuck", + "byteorder", + "heed-traits", + "serde", + "serde_json", +] + [[package]] name = "hermit-abi" version = "0.1.19" @@ -219,13 +317,24 @@ dependencies = [ "libc", ] +[[package]] +name = "idna" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "instant" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -246,6 +355,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" + [[package]] name = "jobserver" version = "0.1.22" @@ -278,6 +393,17 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "lmdb-rkv-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b27470ac25167b3afdfb6af8fcd3bc1be67de50ffbdaf4073378cfded6ae24a5" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "lock_api" version = "0.4.4" @@ -293,9 +419,15 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] +[[package]] +name = "matches" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" + [[package]] name = "memchr" version = "2.4.0" @@ -317,6 +449,16 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" +[[package]] +name = "page_size" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "parking_lot" version = "0.11.1" @@ -334,7 +476,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "instant", "libc", "redox_syscall", @@ -342,12 +484,36 @@ dependencies = [ "winapi", ] +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + [[package]] name = "pkg-config" version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" +[[package]] +name = "proc-macro2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2", +] + [[package]] name = "redox_syscall" version = "0.2.9" @@ -372,12 +538,49 @@ dependencies = [ "smallvec", ] +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "serde" +version = "1.0.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "336b10da19a12ad094b59d870ebde26a45402e5b470add4b5fd03c5048a32127" +dependencies = [ + "itoa", + "ryu", + "serde", +] + [[package]] name = "sled" version = "0.34.6" @@ -386,7 +589,7 @@ checksum = "1d0132f3e393bcb7390c60bb45769498cf4550bcb7a21d7f95c02b69f6362cdc" dependencies = [ "crc32fast", "crossbeam-epoch", - "crossbeam-utils", + "crossbeam-utils 0.8.5", "fs2", "fxhash", "libc", @@ -407,6 +610,26 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +[[package]] +name = "syn" +version = "1.0.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "synchronoise" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d717ed0efc9d39ab3b642a096bc369a3e02a38a51c41845d7fe31bdad1d6eaeb" +dependencies = [ + "crossbeam-queue", +] + [[package]] name = "textwrap" version = "0.11.0" @@ -416,12 +639,83 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "thiserror" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinyvec" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "848a1e1181b9f6753b5e96a092749e29b11d19ede67dfbbd6c7dc7e0f49b5338" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" + +[[package]] +name = "unicode-bidi" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eeb8be209bb1c96b7c177c7420d26e04eccacb0eeae6b980e35fcb74678107e0" +dependencies = [ + "matches", +] + +[[package]] +name = "unicode-normalization" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-width" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "url" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" +dependencies = [ + "form_urlencoded", + "idna", + "matches", + "percent-encoding", +] + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/tools/iface/Cargo.toml b/tools/iface/Cargo.toml index a732700..0fc674d 100644 --- a/tools/iface/Cargo.toml +++ b/tools/iface/Cargo.toml @@ -10,3 +10,5 @@ itertools = "0.10.1" sled = { version = "0.34.6", features = ["compression", "no_metrics"] } rusqlite = { version = "0.25.3", features = ["bundled"] } anyhow = "1.0.42" +heed = { git = "https://github.com/timokoesters/heed.git", rev = "f6f825da7fb2c758867e05ad973ef800a6fe1d5d" } +thiserror = "1.0.26" \ No newline at end of file diff --git a/tools/iface/src/db.rs b/tools/iface/src/db.rs index 56d5db7..755925a 100644 --- a/tools/iface/src/db.rs +++ b/tools/iface/src/db.rs @@ -1,3 +1,4 @@ +pub mod heed; pub mod sled; pub mod sqlite; @@ -8,7 +9,7 @@ pub type KVIter<'a> = Box, Vec)> + 'a>; pub type TreeKVIter<'a> = Box, KVIter<'a>)> + 'a>; pub trait Database { - fn iter<'a>(&'a self) -> TreeKVIter<'a>; + fn names<'a>(&'a self) -> Vec>; fn segment<'a>(&'a mut self, name: Vec) -> Option>; // change return type to Result } @@ -18,25 +19,42 @@ pub trait Segment { &'a mut self, batch: Box, Vec)> + 'a>, ) -> anyhow::Result<()>; + + fn get_iter<'a>(&'a mut self) -> Box; +} + +pub trait SegmentIter { + fn iter<'a>(&'a mut self) -> KVIter<'a>; } pub fn copy_database( - src: &impl Database, - dst: &mut impl Database, + src: &mut dyn Database, + dst: &mut dyn Database, chunk_size: usize, ) -> anyhow::Result<()> { - for (tree, i) in src.iter() { - dbg!(&tree); + // todo remove unwraps + for seg_name in src.names() { + drop(dbg!(String::from_utf8(seg_name.clone()))); - let mut t = dst.segment(tree).unwrap(); // todo remove unwrap + let mut src_seg = src.segment(seg_name.clone()).unwrap(); + + let mut dst_seg = dst.segment(seg_name).unwrap(); + + let mut src_seg_iter = src_seg.get_iter(); + + let i = src_seg_iter.iter(); let mut x: usize = 0; for chunk in &i.chunks(chunk_size) { dbg!(&x); - t.batch_insert(Box::new(chunk))?; + dst_seg.batch_insert(Box::new(chunk))?; x += chunk_size; } + + drop(dst_seg); + + drop(src_seg_iter); } Ok(()) diff --git a/tools/iface/src/db/heed.rs b/tools/iface/src/db/heed.rs new file mode 100644 index 0000000..27b3188 --- /dev/null +++ b/tools/iface/src/db/heed.rs @@ -0,0 +1,106 @@ +use super::{Database, KVIter, Segment, SegmentIter}; +use heed::UntypedDatabase; +use itertools::Itertools; +use std::path::Path; +use thiserror::Error; + +#[derive(Error, Debug)] +#[error("There was a problem with the connection to the heed database: {0}")] +pub struct HeedError(String); + +impl From for HeedError { + fn from(err: heed::Error) -> Self { + Self(err.to_string()) + } +} + +pub fn new_db>(path: P) -> Result { + let mut env_builder = heed::EnvOpenOptions::new(); + env_builder.map_size(1024 * 1024 * 1024); // 1 Terabyte + env_builder.max_readers(126); + env_builder.max_dbs(128); + + Ok(env_builder.open(path)?) +} + +pub struct HeedDB(heed::Env); + +impl HeedDB { + pub fn new(env: heed::Env) -> Self { + Self(env) + } +} + +impl Database for HeedDB { + fn segment<'a>(&'a mut self, name: Vec) -> Option> { + let name = String::from_utf8(name).ok()?; + + let db: UntypedDatabase = self.0.create_database(Some(name.as_str())).ok()?; + + Some(Box::new(HeedSegment { + env: self.0.clone(), + db, + })) + } + + fn names<'a>(&'a self) -> Vec> { + let db: UntypedDatabase = self.0.open_database(None).unwrap().unwrap(); + + let txn = self.0.read_txn().unwrap(); + + db.iter(&txn) + .unwrap() + .filter_map(|r| -> Option<(Vec, UntypedDatabase)> { + let (k, _) = r.ok()?; + + let name = String::from_utf8(k.to_vec()).ok()?; + + if let Some(db) = (self.0.open_database(Some(name.as_str()))).ok().flatten() { + Some((k.to_vec(), db)) + } else { + None + } + }) + .map(|(k, _)| k) + .collect_vec() + } +} +pub struct HeedSegment { + env: heed::Env, + db: heed::UntypedDatabase, +} + +impl Segment for HeedSegment { + fn batch_insert<'a>( + &'a mut self, + batch: Box, Vec)> + 'a>, + ) -> anyhow::Result<()> { + let mut txn = self.env.write_txn().unwrap(); + + for (k, v) in batch { + self.db.put(&mut txn, &k.as_slice(), &v.as_slice()).unwrap(); + } + + txn.commit().unwrap(); + + Ok(()) + } + + fn get_iter<'a>(&'a mut self) -> Box { + todo!() + } +} + +struct HeedSegmentIter<'a>(heed::RoTxn<'a>, &'a heed::UntypedDatabase); + +impl SegmentIter for HeedSegmentIter<'_> { + fn iter<'a>(&'a mut self) -> KVIter<'a> { + Box::new(self.1.iter(&self.0).unwrap().filter_map(|r| { + if let Ok(t) = r { + Some((t.0.to_vec(), t.1.to_vec())) + } else { + None + } + })) + } +} diff --git a/tools/iface/src/db/sled.rs b/tools/iface/src/db/sled.rs index 64fe44f..bf21215 100644 --- a/tools/iface/src/db/sled.rs +++ b/tools/iface/src/db/sled.rs @@ -1,8 +1,7 @@ -use std::path::Path; - +use super::{Database, KVIter, Segment, SegmentIter}; +use itertools::Itertools; use sled::{Batch, Config, Db, Result, Tree}; - -use super::{Database, KVIter, Segment, TreeKVIter}; +use std::path::Path; pub fn new_db>(path: P) -> Result { Config::default().path(path).use_compression(true).open() @@ -17,31 +16,12 @@ impl SledDB { } impl Database for SledDB { - fn iter<'a>(&'a self) -> TreeKVIter<'a> { - Box::new( - self.0 - .tree_names() - .into_iter() - .map(|v| v.to_vec()) - .filter_map(move |v| { - if let Ok(t) = self.0.open_tree(&v) { - Some((v, t)) - } else { - None - } - }) - .map(|(v, t): (Vec, Tree)| -> (Vec, KVIter<'a>) { - let i = t.into_iter().filter_map(|r| { - if let Ok(t) = r { - Some((t.0.to_vec(), t.1.to_vec())) - } else { - None - } - }); - - (v, Box::new(i)) - }), - ) + fn names<'a>(&'a self) -> Vec> { + self.0 + .tree_names() + .into_iter() + .map(|v| v.to_vec()) + .collect_vec() } fn segment(&mut self, name: Vec) -> Option> { @@ -65,4 +45,22 @@ impl Segment for Tree { self.apply_batch(sled_batch).map_err(Into::into) } + + fn get_iter<'a>(&'a mut self) -> Box { + Box::new(SledTreeIter(self)) + } +} + +struct SledTreeIter<'a>(&'a mut Tree); + +impl SegmentIter for SledTreeIter<'_> { + fn iter<'a>(&'a mut self) -> KVIter<'a> { + Box::new(self.0.iter().filter_map(|r| { + if let Ok(t) = r { + Some((t.0.to_vec(), t.1.to_vec())) + } else { + None + } + })) + } } diff --git a/tools/iface/src/db/sqlite.rs b/tools/iface/src/db/sqlite.rs index 96e7463..ec6c03e 100644 --- a/tools/iface/src/db/sqlite.rs +++ b/tools/iface/src/db/sqlite.rs @@ -1,8 +1,8 @@ -use std::path::Path; +use itertools::Itertools; +use rusqlite::{self, Connection, DatabaseName::Main, Statement}; +use std::{collections::HashSet, iter::FromIterator, path::Path}; -use rusqlite::{self, Connection, DatabaseName::Main}; - -use super::{Database, Segment}; +use super::{Database, KVIter, Segment, SegmentIter}; pub fn new_conn>(path: P) -> rusqlite::Result { let path = path.as_ref().join("conduit.db"); @@ -13,37 +13,71 @@ pub fn new_conn>(path: P) -> rusqlite::Result { Ok(conn) } -pub struct SqliteDB(Connection); +pub struct SqliteDB { + conn: Connection, +} -impl SqliteDB { +const CORRECT_TABLE_SET: &[&str] = &["key", "value"]; + +impl<'a> SqliteDB { pub fn new(conn: Connection) -> Self { - Self(conn) + Self { conn } + } + + fn valid_tables(&self) -> Vec { + self.conn + .prepare("SELECT name FROM sqlite_master WHERE type='table'") + .unwrap() + .query_map([], |row| row.get(0)) + .unwrap() + .map(|r| r.unwrap()) + .filter(|a| self.test_table(a)) + .collect() + } + + fn test_table(&self, table: &String) -> bool { + let set: HashSet = self + .conn + .prepare("SELECT name FROM pragma_table_info(?)") + .unwrap() + .query_map([table], |row| row.get(0)) + .unwrap() + .map(|r| r.unwrap()) + .collect(); + + set == HashSet::from_iter(CORRECT_TABLE_SET.iter().map(|s| s.to_string())) } } impl Database for SqliteDB { - fn iter<'a>(&'a self) -> super::TreeKVIter<'a> { - todo!("iterate over tables, pick only tables that have columns 'key' and 'value', then iterate over that with values") + fn names<'a>(&'a self) -> Vec> { + self.valid_tables().into_iter().map_into().collect_vec() } fn segment<'a>(&'a mut self, name: Vec) -> Option> { let string = String::from_utf8(name).unwrap(); // taken from src/database/abstraction/sqlite.rs - self.0.execute(format!("CREATE TABLE IF NOT EXISTS {} ( \"key\" BLOB PRIMARY KEY, \"value\" BLOB NOT NULL )", &string).as_str(), []).unwrap(); + self.conn.execute(format!("CREATE TABLE IF NOT EXISTS {} ( \"key\" BLOB PRIMARY KEY, \"value\" BLOB NOT NULL )", &string).as_str(), []).unwrap(); - Some(Box::new(SqliteTable(&mut self.0, string))) + Some(Box::new(SqliteSegment { + conn: &mut self.conn, + name: string, + })) } } -pub struct SqliteTable<'a>(&'a mut Connection, String); +pub struct SqliteSegment<'a> { + conn: &'a mut Connection, + name: String, +} -impl Segment for SqliteTable<'_> { +impl Segment for SqliteSegment<'_> { fn batch_insert( &mut self, batch: Box, Vec)> + '_>, ) -> anyhow::Result<()> { - let tx = self.0.transaction()?; - let sql_s = format!("INSERT INTO {} (key, value) VALUES (?, ?)", &self.1); + let tx = self.conn.transaction()?; + let sql_s = format!("INSERT INTO {} (key, value) VALUES (?, ?)", &self.name); let sql = sql_s.as_str(); for (k, v) in batch { @@ -52,4 +86,25 @@ impl Segment for SqliteTable<'_> { tx.commit().map_err(Into::into) } + + fn get_iter(&mut self) -> Box { + Box::new(SqliteSegmentIter( + self.conn + .prepare(format!("SELECT key, value FROM {}", self.name).as_str()) + .unwrap(), + )) + } +} + +struct SqliteSegmentIter<'a>(Statement<'a>); + +impl SegmentIter for SqliteSegmentIter<'_> { + fn iter<'f>(&'f mut self) -> KVIter<'f> { + Box::new( + self.0 + .query_map([], |row| Ok((row.get_unwrap(0), row.get_unwrap(1)))) + .unwrap() + .map(|r| r.unwrap()), + ) + } } diff --git a/tools/migrate/Cargo.toml b/tools/migrate/Cargo.toml new file mode 100644 index 0000000..70086b6 --- /dev/null +++ b/tools/migrate/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "conduit_migrate" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = "2.33.3" +anyhow = "1.0.41" +conduit_iface = { path = "../iface/" } +thiserror = "1.0.26" diff --git a/tools/migrate/src/main.rs b/tools/migrate/src/main.rs new file mode 100644 index 0000000..be3298f --- /dev/null +++ b/tools/migrate/src/main.rs @@ -0,0 +1,126 @@ +use clap::{App, Arg}; +use conduit_iface::db::{self, copy_database, heed::HeedDB, sled::SledDB, sqlite::SqliteDB}; +use std::{ + ops::{Deref, DerefMut}, + path::{Path, PathBuf}, +}; + +enum Database { + Sled(SledDB), + Sqlite(SqliteDB), + Heed(HeedDB), +} + +impl Database { + fn new(name: &str, path: PathBuf) -> anyhow::Result { + Ok(match name { + "sled" => Self::Sled(SledDB::new(db::sled::new_db(path)?)), + "heed" => Self::Heed(HeedDB::new(db::heed::new_db(path)?)), + "sqlite" => Self::Sqlite(SqliteDB::new(db::sqlite::new_conn(path)?)), + _ => panic!("unknown database type: {}", name), + }) + } +} + +impl Deref for Database { + type Target = dyn db::Database; + + fn deref(&self) -> &Self::Target { + match self { + Database::Sled(db) => db, + Database::Sqlite(db) => db, + Database::Heed(db) => db, + } + } +} + +impl DerefMut for Database { + fn deref_mut(&mut self) -> &mut Self::Target { + match self { + Database::Sled(db) => db, + Database::Sqlite(db) => db, + Database::Heed(db) => db, + } + } +} + +const DATABASES: &[&str] = &["heed", "sqlite", "sled"]; + +fn main() -> anyhow::Result<()> { + let matches = App::new("Conduit Sled to Sqlite Migrator") + .arg( + Arg::with_name("from_dir") + .short("s") + .long("from-dir") + .takes_value(true) + .long_help("Sets the directory to grab the database from\nWill default to \".\""), + ) + .arg( + Arg::with_name("to_dir") + .short("d") + .long("to-dir") + .takes_value(true) + .long_help("Sets the destination directory\nWill default to from_dir"), + ) + .arg( + Arg::with_name("from") + .short("f") + .long("from") + .long_help( + format!( + "The type of database to convert from\nExample: {}", + DATABASES.join(", ") + ) + .as_str(), + ) + .takes_value(true) + .required(true), + ) + .arg( + Arg::with_name("to") + .short("t") + .long("to") + .long_help( + format!( + "The type of database to convert to\nExample: {}", + DATABASES.join(", ") + ) + .as_str(), + ) + .takes_value(true) + .required(true), + ) + .get_matches(); + + let src_dir = matches.value_of("from_dir").unwrap_or("."); + + let dst_dir = matches.value_of("to_dir"); + + let src_dir = Path::new(src_dir).canonicalize()?; + + if !src_dir.is_dir() { + return Err(anyhow::anyhow!("source path must be directory")); + } + + let dst_dir = match dst_dir { + None => Ok(src_dir.clone()), + Some(dir) => { + let p = Path::new(dir).canonicalize()?; + if !p.is_dir() { + Err(anyhow::anyhow!("destination path must be directory")) + } else { + Ok(p) + } + } + }?; + + dbg!(&src_dir, &dst_dir); + + let mut src_db = Database::new(matches.value_of("from").unwrap(), src_dir)?; + + let mut dst_db = Database::new(matches.value_of("to").unwrap(), dst_dir)?; + + copy_database(&mut *src_db, &mut *dst_db, 1000)?; + + Ok(()) +} diff --git a/tools/sled_to_sqlite/src/main.rs b/tools/sled_to_sqlite/src/main.rs index d706580..803c4c6 100644 --- a/tools/sled_to_sqlite/src/main.rs +++ b/tools/sled_to_sqlite/src/main.rs @@ -42,11 +42,11 @@ fn main() -> anyhow::Result<()> { dbg!(&source_dir, &dest_dir); - let sled = sled::SledDB::new(sled::new_db(source_dir)?); + let mut sled = sled::SledDB::new(sled::new_db(source_dir)?); let mut sqlite = sqlite::SqliteDB::new(sqlite::new_conn(dest_dir)?); - copy_database(&sled, &mut sqlite, 1000)?; + copy_database(&mut sled, &mut sqlite, 1000)?; Ok(()) }