refactor, add heed, add migrate tool

This commit is contained in:
Jonathan de Jong 2021-07-30 20:58:22 +02:00
parent 03305cd144
commit df8d3c95de
9 changed files with 673 additions and 62 deletions

312
Cargo.lock generated
View file

@ -45,12 +45,27 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "bincode"
version = "1.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "1.2.1" version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "bytemuck"
version = "1.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72957246c41db82b8ef88a5486143830adeb8227ef9837740bdec67724cf2c5b"
[[package]] [[package]]
name = "byteorder" name = "byteorder"
version = "1.4.3" version = "1.4.3"
@ -66,6 +81,12 @@ dependencies = [
"jobserver", "jobserver",
] ]
[[package]]
name = "cfg-if"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.0" version = "1.0.0"
@ -92,9 +113,21 @@ name = "conduit_iface"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"heed",
"itertools 0.10.1", "itertools 0.10.1",
"rusqlite", "rusqlite",
"sled", "sled",
"thiserror",
]
[[package]]
name = "conduit_migrate"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"conduit_iface",
"thiserror",
] ]
[[package]] [[package]]
@ -112,7 +145,7 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 1.0.0",
] ]
[[package]] [[package]]
@ -121,20 +154,39 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 1.0.0",
"crossbeam-utils", "crossbeam-utils 0.8.5",
"lazy_static", "lazy_static",
"memoffset", "memoffset",
"scopeguard", "scopeguard",
] ]
[[package]]
name = "crossbeam-queue"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c979cd6cfe72335896575c6b5688da489e420d36a27a0b9eb0c73db574b4a4b"
dependencies = [
"crossbeam-utils 0.6.6",
]
[[package]]
name = "crossbeam-utils"
version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6"
dependencies = [
"cfg-if 0.1.10",
"lazy_static",
]
[[package]] [[package]]
name = "crossbeam-utils" name = "crossbeam-utils"
version = "0.8.5" version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 1.0.0",
"lazy_static", "lazy_static",
] ]
@ -156,6 +208,16 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "form_urlencoded"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191"
dependencies = [
"matches",
"percent-encoding",
]
[[package]] [[package]]
name = "fs2" name = "fs2"
version = "0.4.3" version = "0.4.3"
@ -181,7 +243,7 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 1.0.0",
"libc", "libc",
"wasi", "wasi",
] ]
@ -210,6 +272,42 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "heed"
version = "0.10.6"
source = "git+https://github.com/timokoesters/heed.git?rev=f6f825da7fb2c758867e05ad973ef800a6fe1d5d#f6f825da7fb2c758867e05ad973ef800a6fe1d5d"
dependencies = [
"bytemuck",
"byteorder",
"heed-traits",
"heed-types",
"libc",
"lmdb-rkv-sys",
"once_cell",
"page_size",
"serde",
"synchronoise",
"url",
]
[[package]]
name = "heed-traits"
version = "0.7.0"
source = "git+https://github.com/timokoesters/heed.git?rev=f6f825da7fb2c758867e05ad973ef800a6fe1d5d#f6f825da7fb2c758867e05ad973ef800a6fe1d5d"
[[package]]
name = "heed-types"
version = "0.7.2"
source = "git+https://github.com/timokoesters/heed.git?rev=f6f825da7fb2c758867e05ad973ef800a6fe1d5d#f6f825da7fb2c758867e05ad973ef800a6fe1d5d"
dependencies = [
"bincode",
"bytemuck",
"byteorder",
"heed-traits",
"serde",
"serde_json",
]
[[package]] [[package]]
name = "hermit-abi" name = "hermit-abi"
version = "0.1.19" version = "0.1.19"
@ -219,13 +317,24 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "idna"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8"
dependencies = [
"matches",
"unicode-bidi",
"unicode-normalization",
]
[[package]] [[package]]
name = "instant" name = "instant"
version = "0.1.9" version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 1.0.0",
] ]
[[package]] [[package]]
@ -246,6 +355,12 @@ dependencies = [
"either", "either",
] ]
[[package]]
name = "itoa"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
[[package]] [[package]]
name = "jobserver" name = "jobserver"
version = "0.1.22" version = "0.1.22"
@ -278,6 +393,17 @@ dependencies = [
"vcpkg", "vcpkg",
] ]
[[package]]
name = "lmdb-rkv-sys"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b27470ac25167b3afdfb6af8fcd3bc1be67de50ffbdaf4073378cfded6ae24a5"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]] [[package]]
name = "lock_api" name = "lock_api"
version = "0.4.4" version = "0.4.4"
@ -293,9 +419,15 @@ version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 1.0.0",
] ]
[[package]]
name = "matches"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.4.0" version = "2.4.0"
@ -317,6 +449,16 @@ version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56"
[[package]]
name = "page_size"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd"
dependencies = [
"libc",
"winapi",
]
[[package]] [[package]]
name = "parking_lot" name = "parking_lot"
version = "0.11.1" version = "0.11.1"
@ -334,7 +476,7 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018"
dependencies = [ dependencies = [
"cfg-if", "cfg-if 1.0.0",
"instant", "instant",
"libc", "libc",
"redox_syscall", "redox_syscall",
@ -342,12 +484,36 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "percent-encoding"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]] [[package]]
name = "pkg-config" name = "pkg-config"
version = "0.3.19" version = "0.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
[[package]]
name = "proc-macro2"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
dependencies = [
"proc-macro2",
]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.2.9" version = "0.2.9"
@ -372,12 +538,49 @@ dependencies = [
"smallvec", "smallvec",
] ]
[[package]]
name = "ryu"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
[[package]] [[package]]
name = "scopeguard" name = "scopeguard"
version = "1.1.0" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "serde"
version = "1.0.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b10da19a12ad094b59d870ebde26a45402e5b470add4b5fd03c5048a32127"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]] [[package]]
name = "sled" name = "sled"
version = "0.34.6" version = "0.34.6"
@ -386,7 +589,7 @@ checksum = "1d0132f3e393bcb7390c60bb45769498cf4550bcb7a21d7f95c02b69f6362cdc"
dependencies = [ dependencies = [
"crc32fast", "crc32fast",
"crossbeam-epoch", "crossbeam-epoch",
"crossbeam-utils", "crossbeam-utils 0.8.5",
"fs2", "fs2",
"fxhash", "fxhash",
"libc", "libc",
@ -407,6 +610,26 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "syn"
version = "1.0.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "synchronoise"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d717ed0efc9d39ab3b642a096bc369a3e02a38a51c41845d7fe31bdad1d6eaeb"
dependencies = [
"crossbeam-queue",
]
[[package]] [[package]]
name = "textwrap" name = "textwrap"
version = "0.11.0" version = "0.11.0"
@ -416,12 +639,83 @@ dependencies = [
"unicode-width", "unicode-width",
] ]
[[package]]
name = "thiserror"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tinyvec"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "848a1e1181b9f6753b5e96a092749e29b11d19ede67dfbbd6c7dc7e0f49b5338"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "unicode-bidi"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eeb8be209bb1c96b7c177c7420d26e04eccacb0eeae6b980e35fcb74678107e0"
dependencies = [
"matches",
]
[[package]]
name = "unicode-normalization"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9"
dependencies = [
"tinyvec",
]
[[package]] [[package]]
name = "unicode-width" name = "unicode-width"
version = "0.1.8" version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
[[package]]
name = "unicode-xid"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "url"
version = "2.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c"
dependencies = [
"form_urlencoded",
"idna",
"matches",
"percent-encoding",
]
[[package]] [[package]]
name = "vcpkg" name = "vcpkg"
version = "0.2.15" version = "0.2.15"

View file

@ -10,3 +10,5 @@ itertools = "0.10.1"
sled = { version = "0.34.6", features = ["compression", "no_metrics"] } sled = { version = "0.34.6", features = ["compression", "no_metrics"] }
rusqlite = { version = "0.25.3", features = ["bundled"] } rusqlite = { version = "0.25.3", features = ["bundled"] }
anyhow = "1.0.42" anyhow = "1.0.42"
heed = { git = "https://github.com/timokoesters/heed.git", rev = "f6f825da7fb2c758867e05ad973ef800a6fe1d5d" }
thiserror = "1.0.26"

View file

@ -1,3 +1,4 @@
pub mod heed;
pub mod sled; pub mod sled;
pub mod sqlite; pub mod sqlite;
@ -8,7 +9,7 @@ pub type KVIter<'a> = Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + 'a>;
pub type TreeKVIter<'a> = Box<dyn Iterator<Item = (Vec<u8>, KVIter<'a>)> + 'a>; pub type TreeKVIter<'a> = Box<dyn Iterator<Item = (Vec<u8>, KVIter<'a>)> + 'a>;
pub trait Database { pub trait Database {
fn iter<'a>(&'a self) -> TreeKVIter<'a>; fn names<'a>(&'a self) -> Vec<Vec<u8>>;
fn segment<'a>(&'a mut self, name: Vec<u8>) -> Option<Box<dyn Segment + 'a>>; // change return type to Result fn segment<'a>(&'a mut self, name: Vec<u8>) -> Option<Box<dyn Segment + 'a>>; // change return type to Result
} }
@ -18,25 +19,42 @@ pub trait Segment {
&'a mut self, &'a mut self,
batch: Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + 'a>, batch: Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + 'a>,
) -> anyhow::Result<()>; ) -> anyhow::Result<()>;
fn get_iter<'a>(&'a mut self) -> Box<dyn SegmentIter + 'a>;
}
pub trait SegmentIter {
fn iter<'a>(&'a mut self) -> KVIter<'a>;
} }
pub fn copy_database( pub fn copy_database(
src: &impl Database, src: &mut dyn Database,
dst: &mut impl Database, dst: &mut dyn Database,
chunk_size: usize, chunk_size: usize,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
for (tree, i) in src.iter() { // todo remove unwraps
dbg!(&tree); for seg_name in src.names() {
drop(dbg!(String::from_utf8(seg_name.clone())));
let mut t = dst.segment(tree).unwrap(); // todo remove unwrap let mut src_seg = src.segment(seg_name.clone()).unwrap();
let mut dst_seg = dst.segment(seg_name).unwrap();
let mut src_seg_iter = src_seg.get_iter();
let i = src_seg_iter.iter();
let mut x: usize = 0; let mut x: usize = 0;
for chunk in &i.chunks(chunk_size) { for chunk in &i.chunks(chunk_size) {
dbg!(&x); dbg!(&x);
t.batch_insert(Box::new(chunk))?; dst_seg.batch_insert(Box::new(chunk))?;
x += chunk_size; x += chunk_size;
} }
drop(dst_seg);
drop(src_seg_iter);
} }
Ok(()) Ok(())

106
tools/iface/src/db/heed.rs Normal file
View file

@ -0,0 +1,106 @@
use super::{Database, KVIter, Segment, SegmentIter};
use heed::UntypedDatabase;
use itertools::Itertools;
use std::path::Path;
use thiserror::Error;
#[derive(Error, Debug)]
#[error("There was a problem with the connection to the heed database: {0}")]
pub struct HeedError(String);
impl From<heed::Error> for HeedError {
fn from(err: heed::Error) -> Self {
Self(err.to_string())
}
}
pub fn new_db<P: AsRef<Path>>(path: P) -> Result<heed::Env, HeedError> {
let mut env_builder = heed::EnvOpenOptions::new();
env_builder.map_size(1024 * 1024 * 1024); // 1 Terabyte
env_builder.max_readers(126);
env_builder.max_dbs(128);
Ok(env_builder.open(path)?)
}
pub struct HeedDB(heed::Env);
impl HeedDB {
pub fn new(env: heed::Env) -> Self {
Self(env)
}
}
impl Database for HeedDB {
fn segment<'a>(&'a mut self, name: Vec<u8>) -> Option<Box<dyn super::Segment + 'a>> {
let name = String::from_utf8(name).ok()?;
let db: UntypedDatabase = self.0.create_database(Some(name.as_str())).ok()?;
Some(Box::new(HeedSegment {
env: self.0.clone(),
db,
}))
}
fn names<'a>(&'a self) -> Vec<Vec<u8>> {
let db: UntypedDatabase = self.0.open_database(None).unwrap().unwrap();
let txn = self.0.read_txn().unwrap();
db.iter(&txn)
.unwrap()
.filter_map(|r| -> Option<(Vec<u8>, UntypedDatabase)> {
let (k, _) = r.ok()?;
let name = String::from_utf8(k.to_vec()).ok()?;
if let Some(db) = (self.0.open_database(Some(name.as_str()))).ok().flatten() {
Some((k.to_vec(), db))
} else {
None
}
})
.map(|(k, _)| k)
.collect_vec()
}
}
pub struct HeedSegment {
env: heed::Env,
db: heed::UntypedDatabase,
}
impl Segment for HeedSegment {
fn batch_insert<'a>(
&'a mut self,
batch: Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + 'a>,
) -> anyhow::Result<()> {
let mut txn = self.env.write_txn().unwrap();
for (k, v) in batch {
self.db.put(&mut txn, &k.as_slice(), &v.as_slice()).unwrap();
}
txn.commit().unwrap();
Ok(())
}
fn get_iter<'a>(&'a mut self) -> Box<dyn super::SegmentIter + 'a> {
todo!()
}
}
struct HeedSegmentIter<'a>(heed::RoTxn<'a>, &'a heed::UntypedDatabase);
impl SegmentIter for HeedSegmentIter<'_> {
fn iter<'a>(&'a mut self) -> KVIter<'a> {
Box::new(self.1.iter(&self.0).unwrap().filter_map(|r| {
if let Ok(t) = r {
Some((t.0.to_vec(), t.1.to_vec()))
} else {
None
}
}))
}
}

View file

@ -1,8 +1,7 @@
use std::path::Path; use super::{Database, KVIter, Segment, SegmentIter};
use itertools::Itertools;
use sled::{Batch, Config, Db, Result, Tree}; use sled::{Batch, Config, Db, Result, Tree};
use std::path::Path;
use super::{Database, KVIter, Segment, TreeKVIter};
pub fn new_db<P: AsRef<Path>>(path: P) -> Result<Db> { pub fn new_db<P: AsRef<Path>>(path: P) -> Result<Db> {
Config::default().path(path).use_compression(true).open() Config::default().path(path).use_compression(true).open()
@ -17,31 +16,12 @@ impl SledDB {
} }
impl Database for SledDB { impl Database for SledDB {
fn iter<'a>(&'a self) -> TreeKVIter<'a> { fn names<'a>(&'a self) -> Vec<Vec<u8>> {
Box::new( self.0
self.0 .tree_names()
.tree_names() .into_iter()
.into_iter() .map(|v| v.to_vec())
.map(|v| v.to_vec()) .collect_vec()
.filter_map(move |v| {
if let Ok(t) = self.0.open_tree(&v) {
Some((v, t))
} else {
None
}
})
.map(|(v, t): (Vec<u8>, Tree)| -> (Vec<u8>, KVIter<'a>) {
let i = t.into_iter().filter_map(|r| {
if let Ok(t) = r {
Some((t.0.to_vec(), t.1.to_vec()))
} else {
None
}
});
(v, Box::new(i))
}),
)
} }
fn segment(&mut self, name: Vec<u8>) -> Option<Box<dyn Segment>> { fn segment(&mut self, name: Vec<u8>) -> Option<Box<dyn Segment>> {
@ -65,4 +45,22 @@ impl Segment for Tree {
self.apply_batch(sled_batch).map_err(Into::into) self.apply_batch(sled_batch).map_err(Into::into)
} }
fn get_iter<'a>(&'a mut self) -> Box<dyn super::SegmentIter + 'a> {
Box::new(SledTreeIter(self))
}
}
struct SledTreeIter<'a>(&'a mut Tree);
impl SegmentIter for SledTreeIter<'_> {
fn iter<'a>(&'a mut self) -> KVIter<'a> {
Box::new(self.0.iter().filter_map(|r| {
if let Ok(t) = r {
Some((t.0.to_vec(), t.1.to_vec()))
} else {
None
}
}))
}
} }

View file

@ -1,8 +1,8 @@
use std::path::Path; use itertools::Itertools;
use rusqlite::{self, Connection, DatabaseName::Main, Statement};
use std::{collections::HashSet, iter::FromIterator, path::Path};
use rusqlite::{self, Connection, DatabaseName::Main}; use super::{Database, KVIter, Segment, SegmentIter};
use super::{Database, Segment};
pub fn new_conn<P: AsRef<Path>>(path: P) -> rusqlite::Result<Connection> { pub fn new_conn<P: AsRef<Path>>(path: P) -> rusqlite::Result<Connection> {
let path = path.as_ref().join("conduit.db"); let path = path.as_ref().join("conduit.db");
@ -13,37 +13,71 @@ pub fn new_conn<P: AsRef<Path>>(path: P) -> rusqlite::Result<Connection> {
Ok(conn) Ok(conn)
} }
pub struct SqliteDB(Connection); pub struct SqliteDB {
conn: Connection,
}
impl SqliteDB { const CORRECT_TABLE_SET: &[&str] = &["key", "value"];
impl<'a> SqliteDB {
pub fn new(conn: Connection) -> Self { pub fn new(conn: Connection) -> Self {
Self(conn) Self { conn }
}
fn valid_tables(&self) -> Vec<String> {
self.conn
.prepare("SELECT name FROM sqlite_master WHERE type='table'")
.unwrap()
.query_map([], |row| row.get(0))
.unwrap()
.map(|r| r.unwrap())
.filter(|a| self.test_table(a))
.collect()
}
fn test_table(&self, table: &String) -> bool {
let set: HashSet<String> = self
.conn
.prepare("SELECT name FROM pragma_table_info(?)")
.unwrap()
.query_map([table], |row| row.get(0))
.unwrap()
.map(|r| r.unwrap())
.collect();
set == HashSet::from_iter(CORRECT_TABLE_SET.iter().map(|s| s.to_string()))
} }
} }
impl Database for SqliteDB { impl Database for SqliteDB {
fn iter<'a>(&'a self) -> super::TreeKVIter<'a> { fn names<'a>(&'a self) -> Vec<Vec<u8>> {
todo!("iterate over tables, pick only tables that have columns 'key' and 'value', then iterate over that with values") self.valid_tables().into_iter().map_into().collect_vec()
} }
fn segment<'a>(&'a mut self, name: Vec<u8>) -> Option<Box<dyn Segment + 'a>> { fn segment<'a>(&'a mut self, name: Vec<u8>) -> Option<Box<dyn Segment + 'a>> {
let string = String::from_utf8(name).unwrap(); let string = String::from_utf8(name).unwrap();
// taken from src/database/abstraction/sqlite.rs // taken from src/database/abstraction/sqlite.rs
self.0.execute(format!("CREATE TABLE IF NOT EXISTS {} ( \"key\" BLOB PRIMARY KEY, \"value\" BLOB NOT NULL )", &string).as_str(), []).unwrap(); self.conn.execute(format!("CREATE TABLE IF NOT EXISTS {} ( \"key\" BLOB PRIMARY KEY, \"value\" BLOB NOT NULL )", &string).as_str(), []).unwrap();
Some(Box::new(SqliteTable(&mut self.0, string))) Some(Box::new(SqliteSegment {
conn: &mut self.conn,
name: string,
}))
} }
} }
pub struct SqliteTable<'a>(&'a mut Connection, String); pub struct SqliteSegment<'a> {
conn: &'a mut Connection,
name: String,
}
impl Segment for SqliteTable<'_> { impl Segment for SqliteSegment<'_> {
fn batch_insert( fn batch_insert(
&mut self, &mut self,
batch: Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + '_>, batch: Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + '_>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let tx = self.0.transaction()?; let tx = self.conn.transaction()?;
let sql_s = format!("INSERT INTO {} (key, value) VALUES (?, ?)", &self.1); let sql_s = format!("INSERT INTO {} (key, value) VALUES (?, ?)", &self.name);
let sql = sql_s.as_str(); let sql = sql_s.as_str();
for (k, v) in batch { for (k, v) in batch {
@ -52,4 +86,25 @@ impl Segment for SqliteTable<'_> {
tx.commit().map_err(Into::into) tx.commit().map_err(Into::into)
} }
fn get_iter(&mut self) -> Box<dyn super::SegmentIter + '_> {
Box::new(SqliteSegmentIter(
self.conn
.prepare(format!("SELECT key, value FROM {}", self.name).as_str())
.unwrap(),
))
}
}
struct SqliteSegmentIter<'a>(Statement<'a>);
impl SegmentIter for SqliteSegmentIter<'_> {
fn iter<'f>(&'f mut self) -> KVIter<'f> {
Box::new(
self.0
.query_map([], |row| Ok((row.get_unwrap(0), row.get_unwrap(1))))
.unwrap()
.map(|r| r.unwrap()),
)
}
} }

12
tools/migrate/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "conduit_migrate"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = "2.33.3"
anyhow = "1.0.41"
conduit_iface = { path = "../iface/" }
thiserror = "1.0.26"

126
tools/migrate/src/main.rs Normal file
View file

@ -0,0 +1,126 @@
use clap::{App, Arg};
use conduit_iface::db::{self, copy_database, heed::HeedDB, sled::SledDB, sqlite::SqliteDB};
use std::{
ops::{Deref, DerefMut},
path::{Path, PathBuf},
};
enum Database {
Sled(SledDB),
Sqlite(SqliteDB),
Heed(HeedDB),
}
impl Database {
fn new(name: &str, path: PathBuf) -> anyhow::Result<Self> {
Ok(match name {
"sled" => Self::Sled(SledDB::new(db::sled::new_db(path)?)),
"heed" => Self::Heed(HeedDB::new(db::heed::new_db(path)?)),
"sqlite" => Self::Sqlite(SqliteDB::new(db::sqlite::new_conn(path)?)),
_ => panic!("unknown database type: {}", name),
})
}
}
impl Deref for Database {
type Target = dyn db::Database;
fn deref(&self) -> &Self::Target {
match self {
Database::Sled(db) => db,
Database::Sqlite(db) => db,
Database::Heed(db) => db,
}
}
}
impl DerefMut for Database {
fn deref_mut(&mut self) -> &mut Self::Target {
match self {
Database::Sled(db) => db,
Database::Sqlite(db) => db,
Database::Heed(db) => db,
}
}
}
const DATABASES: &[&str] = &["heed", "sqlite", "sled"];
fn main() -> anyhow::Result<()> {
let matches = App::new("Conduit Sled to Sqlite Migrator")
.arg(
Arg::with_name("from_dir")
.short("s")
.long("from-dir")
.takes_value(true)
.long_help("Sets the directory to grab the database from\nWill default to \".\""),
)
.arg(
Arg::with_name("to_dir")
.short("d")
.long("to-dir")
.takes_value(true)
.long_help("Sets the destination directory\nWill default to from_dir"),
)
.arg(
Arg::with_name("from")
.short("f")
.long("from")
.long_help(
format!(
"The type of database to convert from\nExample: {}",
DATABASES.join(", ")
)
.as_str(),
)
.takes_value(true)
.required(true),
)
.arg(
Arg::with_name("to")
.short("t")
.long("to")
.long_help(
format!(
"The type of database to convert to\nExample: {}",
DATABASES.join(", ")
)
.as_str(),
)
.takes_value(true)
.required(true),
)
.get_matches();
let src_dir = matches.value_of("from_dir").unwrap_or(".");
let dst_dir = matches.value_of("to_dir");
let src_dir = Path::new(src_dir).canonicalize()?;
if !src_dir.is_dir() {
return Err(anyhow::anyhow!("source path must be directory"));
}
let dst_dir = match dst_dir {
None => Ok(src_dir.clone()),
Some(dir) => {
let p = Path::new(dir).canonicalize()?;
if !p.is_dir() {
Err(anyhow::anyhow!("destination path must be directory"))
} else {
Ok(p)
}
}
}?;
dbg!(&src_dir, &dst_dir);
let mut src_db = Database::new(matches.value_of("from").unwrap(), src_dir)?;
let mut dst_db = Database::new(matches.value_of("to").unwrap(), dst_dir)?;
copy_database(&mut *src_db, &mut *dst_db, 1000)?;
Ok(())
}

View file

@ -42,11 +42,11 @@ fn main() -> anyhow::Result<()> {
dbg!(&source_dir, &dest_dir); dbg!(&source_dir, &dest_dir);
let sled = sled::SledDB::new(sled::new_db(source_dir)?); let mut sled = sled::SledDB::new(sled::new_db(source_dir)?);
let mut sqlite = sqlite::SqliteDB::new(sqlite::new_conn(dest_dir)?); let mut sqlite = sqlite::SqliteDB::new(sqlite::new_conn(dest_dir)?);
copy_database(&sled, &mut sqlite, 1000)?; copy_database(&mut sled, &mut sqlite, 1000)?;
Ok(()) Ok(())
} }