mirror of
https://github.com/ShadowJonathan/conduit_toolbox.git
synced 2025-07-25 21:04:08 +03:00
refactor, add heed, add migrate tool
This commit is contained in:
parent
03305cd144
commit
df8d3c95de
9 changed files with 673 additions and 62 deletions
|
@ -10,3 +10,5 @@ itertools = "0.10.1"
|
|||
sled = { version = "0.34.6", features = ["compression", "no_metrics"] }
|
||||
rusqlite = { version = "0.25.3", features = ["bundled"] }
|
||||
anyhow = "1.0.42"
|
||||
heed = { git = "https://github.com/timokoesters/heed.git", rev = "f6f825da7fb2c758867e05ad973ef800a6fe1d5d" }
|
||||
thiserror = "1.0.26"
|
|
@ -1,3 +1,4 @@
|
|||
pub mod heed;
|
||||
pub mod sled;
|
||||
pub mod sqlite;
|
||||
|
||||
|
@ -8,7 +9,7 @@ pub type KVIter<'a> = Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + 'a>;
|
|||
pub type TreeKVIter<'a> = Box<dyn Iterator<Item = (Vec<u8>, KVIter<'a>)> + 'a>;
|
||||
|
||||
pub trait Database {
|
||||
fn iter<'a>(&'a self) -> TreeKVIter<'a>;
|
||||
fn names<'a>(&'a self) -> Vec<Vec<u8>>;
|
||||
|
||||
fn segment<'a>(&'a mut self, name: Vec<u8>) -> Option<Box<dyn Segment + 'a>>; // change return type to Result
|
||||
}
|
||||
|
@ -18,25 +19,42 @@ pub trait Segment {
|
|||
&'a mut self,
|
||||
batch: Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + 'a>,
|
||||
) -> anyhow::Result<()>;
|
||||
|
||||
fn get_iter<'a>(&'a mut self) -> Box<dyn SegmentIter + 'a>;
|
||||
}
|
||||
|
||||
pub trait SegmentIter {
|
||||
fn iter<'a>(&'a mut self) -> KVIter<'a>;
|
||||
}
|
||||
|
||||
pub fn copy_database(
|
||||
src: &impl Database,
|
||||
dst: &mut impl Database,
|
||||
src: &mut dyn Database,
|
||||
dst: &mut dyn Database,
|
||||
chunk_size: usize,
|
||||
) -> anyhow::Result<()> {
|
||||
for (tree, i) in src.iter() {
|
||||
dbg!(&tree);
|
||||
// todo remove unwraps
|
||||
for seg_name in src.names() {
|
||||
drop(dbg!(String::from_utf8(seg_name.clone())));
|
||||
|
||||
let mut t = dst.segment(tree).unwrap(); // todo remove unwrap
|
||||
let mut src_seg = src.segment(seg_name.clone()).unwrap();
|
||||
|
||||
let mut dst_seg = dst.segment(seg_name).unwrap();
|
||||
|
||||
let mut src_seg_iter = src_seg.get_iter();
|
||||
|
||||
let i = src_seg_iter.iter();
|
||||
|
||||
let mut x: usize = 0;
|
||||
|
||||
for chunk in &i.chunks(chunk_size) {
|
||||
dbg!(&x);
|
||||
t.batch_insert(Box::new(chunk))?;
|
||||
dst_seg.batch_insert(Box::new(chunk))?;
|
||||
x += chunk_size;
|
||||
}
|
||||
|
||||
drop(dst_seg);
|
||||
|
||||
drop(src_seg_iter);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
106
tools/iface/src/db/heed.rs
Normal file
106
tools/iface/src/db/heed.rs
Normal file
|
@ -0,0 +1,106 @@
|
|||
use super::{Database, KVIter, Segment, SegmentIter};
|
||||
use heed::UntypedDatabase;
|
||||
use itertools::Itertools;
|
||||
use std::path::Path;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[error("There was a problem with the connection to the heed database: {0}")]
|
||||
pub struct HeedError(String);
|
||||
|
||||
impl From<heed::Error> for HeedError {
|
||||
fn from(err: heed::Error) -> Self {
|
||||
Self(err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_db<P: AsRef<Path>>(path: P) -> Result<heed::Env, HeedError> {
|
||||
let mut env_builder = heed::EnvOpenOptions::new();
|
||||
env_builder.map_size(1024 * 1024 * 1024); // 1 Terabyte
|
||||
env_builder.max_readers(126);
|
||||
env_builder.max_dbs(128);
|
||||
|
||||
Ok(env_builder.open(path)?)
|
||||
}
|
||||
|
||||
pub struct HeedDB(heed::Env);
|
||||
|
||||
impl HeedDB {
|
||||
pub fn new(env: heed::Env) -> Self {
|
||||
Self(env)
|
||||
}
|
||||
}
|
||||
|
||||
impl Database for HeedDB {
|
||||
fn segment<'a>(&'a mut self, name: Vec<u8>) -> Option<Box<dyn super::Segment + 'a>> {
|
||||
let name = String::from_utf8(name).ok()?;
|
||||
|
||||
let db: UntypedDatabase = self.0.create_database(Some(name.as_str())).ok()?;
|
||||
|
||||
Some(Box::new(HeedSegment {
|
||||
env: self.0.clone(),
|
||||
db,
|
||||
}))
|
||||
}
|
||||
|
||||
fn names<'a>(&'a self) -> Vec<Vec<u8>> {
|
||||
let db: UntypedDatabase = self.0.open_database(None).unwrap().unwrap();
|
||||
|
||||
let txn = self.0.read_txn().unwrap();
|
||||
|
||||
db.iter(&txn)
|
||||
.unwrap()
|
||||
.filter_map(|r| -> Option<(Vec<u8>, UntypedDatabase)> {
|
||||
let (k, _) = r.ok()?;
|
||||
|
||||
let name = String::from_utf8(k.to_vec()).ok()?;
|
||||
|
||||
if let Some(db) = (self.0.open_database(Some(name.as_str()))).ok().flatten() {
|
||||
Some((k.to_vec(), db))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.map(|(k, _)| k)
|
||||
.collect_vec()
|
||||
}
|
||||
}
|
||||
pub struct HeedSegment {
|
||||
env: heed::Env,
|
||||
db: heed::UntypedDatabase,
|
||||
}
|
||||
|
||||
impl Segment for HeedSegment {
|
||||
fn batch_insert<'a>(
|
||||
&'a mut self,
|
||||
batch: Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + 'a>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut txn = self.env.write_txn().unwrap();
|
||||
|
||||
for (k, v) in batch {
|
||||
self.db.put(&mut txn, &k.as_slice(), &v.as_slice()).unwrap();
|
||||
}
|
||||
|
||||
txn.commit().unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_iter<'a>(&'a mut self) -> Box<dyn super::SegmentIter + 'a> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
struct HeedSegmentIter<'a>(heed::RoTxn<'a>, &'a heed::UntypedDatabase);
|
||||
|
||||
impl SegmentIter for HeedSegmentIter<'_> {
|
||||
fn iter<'a>(&'a mut self) -> KVIter<'a> {
|
||||
Box::new(self.1.iter(&self.0).unwrap().filter_map(|r| {
|
||||
if let Ok(t) = r {
|
||||
Some((t.0.to_vec(), t.1.to_vec()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}))
|
||||
}
|
||||
}
|
|
@ -1,8 +1,7 @@
|
|||
use std::path::Path;
|
||||
|
||||
use super::{Database, KVIter, Segment, SegmentIter};
|
||||
use itertools::Itertools;
|
||||
use sled::{Batch, Config, Db, Result, Tree};
|
||||
|
||||
use super::{Database, KVIter, Segment, TreeKVIter};
|
||||
use std::path::Path;
|
||||
|
||||
pub fn new_db<P: AsRef<Path>>(path: P) -> Result<Db> {
|
||||
Config::default().path(path).use_compression(true).open()
|
||||
|
@ -17,31 +16,12 @@ impl SledDB {
|
|||
}
|
||||
|
||||
impl Database for SledDB {
|
||||
fn iter<'a>(&'a self) -> TreeKVIter<'a> {
|
||||
Box::new(
|
||||
self.0
|
||||
.tree_names()
|
||||
.into_iter()
|
||||
.map(|v| v.to_vec())
|
||||
.filter_map(move |v| {
|
||||
if let Ok(t) = self.0.open_tree(&v) {
|
||||
Some((v, t))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.map(|(v, t): (Vec<u8>, Tree)| -> (Vec<u8>, KVIter<'a>) {
|
||||
let i = t.into_iter().filter_map(|r| {
|
||||
if let Ok(t) = r {
|
||||
Some((t.0.to_vec(), t.1.to_vec()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
(v, Box::new(i))
|
||||
}),
|
||||
)
|
||||
fn names<'a>(&'a self) -> Vec<Vec<u8>> {
|
||||
self.0
|
||||
.tree_names()
|
||||
.into_iter()
|
||||
.map(|v| v.to_vec())
|
||||
.collect_vec()
|
||||
}
|
||||
|
||||
fn segment(&mut self, name: Vec<u8>) -> Option<Box<dyn Segment>> {
|
||||
|
@ -65,4 +45,22 @@ impl Segment for Tree {
|
|||
|
||||
self.apply_batch(sled_batch).map_err(Into::into)
|
||||
}
|
||||
|
||||
fn get_iter<'a>(&'a mut self) -> Box<dyn super::SegmentIter + 'a> {
|
||||
Box::new(SledTreeIter(self))
|
||||
}
|
||||
}
|
||||
|
||||
struct SledTreeIter<'a>(&'a mut Tree);
|
||||
|
||||
impl SegmentIter for SledTreeIter<'_> {
|
||||
fn iter<'a>(&'a mut self) -> KVIter<'a> {
|
||||
Box::new(self.0.iter().filter_map(|r| {
|
||||
if let Ok(t) = r {
|
||||
Some((t.0.to_vec(), t.1.to_vec()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
use std::path::Path;
|
||||
use itertools::Itertools;
|
||||
use rusqlite::{self, Connection, DatabaseName::Main, Statement};
|
||||
use std::{collections::HashSet, iter::FromIterator, path::Path};
|
||||
|
||||
use rusqlite::{self, Connection, DatabaseName::Main};
|
||||
|
||||
use super::{Database, Segment};
|
||||
use super::{Database, KVIter, Segment, SegmentIter};
|
||||
|
||||
pub fn new_conn<P: AsRef<Path>>(path: P) -> rusqlite::Result<Connection> {
|
||||
let path = path.as_ref().join("conduit.db");
|
||||
|
@ -13,37 +13,71 @@ pub fn new_conn<P: AsRef<Path>>(path: P) -> rusqlite::Result<Connection> {
|
|||
Ok(conn)
|
||||
}
|
||||
|
||||
pub struct SqliteDB(Connection);
|
||||
pub struct SqliteDB {
|
||||
conn: Connection,
|
||||
}
|
||||
|
||||
impl SqliteDB {
|
||||
const CORRECT_TABLE_SET: &[&str] = &["key", "value"];
|
||||
|
||||
impl<'a> SqliteDB {
|
||||
pub fn new(conn: Connection) -> Self {
|
||||
Self(conn)
|
||||
Self { conn }
|
||||
}
|
||||
|
||||
fn valid_tables(&self) -> Vec<String> {
|
||||
self.conn
|
||||
.prepare("SELECT name FROM sqlite_master WHERE type='table'")
|
||||
.unwrap()
|
||||
.query_map([], |row| row.get(0))
|
||||
.unwrap()
|
||||
.map(|r| r.unwrap())
|
||||
.filter(|a| self.test_table(a))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn test_table(&self, table: &String) -> bool {
|
||||
let set: HashSet<String> = self
|
||||
.conn
|
||||
.prepare("SELECT name FROM pragma_table_info(?)")
|
||||
.unwrap()
|
||||
.query_map([table], |row| row.get(0))
|
||||
.unwrap()
|
||||
.map(|r| r.unwrap())
|
||||
.collect();
|
||||
|
||||
set == HashSet::from_iter(CORRECT_TABLE_SET.iter().map(|s| s.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
impl Database for SqliteDB {
|
||||
fn iter<'a>(&'a self) -> super::TreeKVIter<'a> {
|
||||
todo!("iterate over tables, pick only tables that have columns 'key' and 'value', then iterate over that with values")
|
||||
fn names<'a>(&'a self) -> Vec<Vec<u8>> {
|
||||
self.valid_tables().into_iter().map_into().collect_vec()
|
||||
}
|
||||
|
||||
fn segment<'a>(&'a mut self, name: Vec<u8>) -> Option<Box<dyn Segment + 'a>> {
|
||||
let string = String::from_utf8(name).unwrap();
|
||||
// taken from src/database/abstraction/sqlite.rs
|
||||
self.0.execute(format!("CREATE TABLE IF NOT EXISTS {} ( \"key\" BLOB PRIMARY KEY, \"value\" BLOB NOT NULL )", &string).as_str(), []).unwrap();
|
||||
self.conn.execute(format!("CREATE TABLE IF NOT EXISTS {} ( \"key\" BLOB PRIMARY KEY, \"value\" BLOB NOT NULL )", &string).as_str(), []).unwrap();
|
||||
|
||||
Some(Box::new(SqliteTable(&mut self.0, string)))
|
||||
Some(Box::new(SqliteSegment {
|
||||
conn: &mut self.conn,
|
||||
name: string,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SqliteTable<'a>(&'a mut Connection, String);
|
||||
pub struct SqliteSegment<'a> {
|
||||
conn: &'a mut Connection,
|
||||
name: String,
|
||||
}
|
||||
|
||||
impl Segment for SqliteTable<'_> {
|
||||
impl Segment for SqliteSegment<'_> {
|
||||
fn batch_insert(
|
||||
&mut self,
|
||||
batch: Box<dyn Iterator<Item = (Vec<u8>, Vec<u8>)> + '_>,
|
||||
) -> anyhow::Result<()> {
|
||||
let tx = self.0.transaction()?;
|
||||
let sql_s = format!("INSERT INTO {} (key, value) VALUES (?, ?)", &self.1);
|
||||
let tx = self.conn.transaction()?;
|
||||
let sql_s = format!("INSERT INTO {} (key, value) VALUES (?, ?)", &self.name);
|
||||
let sql = sql_s.as_str();
|
||||
|
||||
for (k, v) in batch {
|
||||
|
@ -52,4 +86,25 @@ impl Segment for SqliteTable<'_> {
|
|||
|
||||
tx.commit().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn get_iter(&mut self) -> Box<dyn super::SegmentIter + '_> {
|
||||
Box::new(SqliteSegmentIter(
|
||||
self.conn
|
||||
.prepare(format!("SELECT key, value FROM {}", self.name).as_str())
|
||||
.unwrap(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
struct SqliteSegmentIter<'a>(Statement<'a>);
|
||||
|
||||
impl SegmentIter for SqliteSegmentIter<'_> {
|
||||
fn iter<'f>(&'f mut self) -> KVIter<'f> {
|
||||
Box::new(
|
||||
self.0
|
||||
.query_map([], |row| Ok((row.get_unwrap(0), row.get_unwrap(1))))
|
||||
.unwrap()
|
||||
.map(|r| r.unwrap()),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
12
tools/migrate/Cargo.toml
Normal file
12
tools/migrate/Cargo.toml
Normal file
|
@ -0,0 +1,12 @@
|
|||
[package]
|
||||
name = "conduit_migrate"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
clap = "2.33.3"
|
||||
anyhow = "1.0.41"
|
||||
conduit_iface = { path = "../iface/" }
|
||||
thiserror = "1.0.26"
|
126
tools/migrate/src/main.rs
Normal file
126
tools/migrate/src/main.rs
Normal file
|
@ -0,0 +1,126 @@
|
|||
use clap::{App, Arg};
|
||||
use conduit_iface::db::{self, copy_database, heed::HeedDB, sled::SledDB, sqlite::SqliteDB};
|
||||
use std::{
|
||||
ops::{Deref, DerefMut},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
enum Database {
|
||||
Sled(SledDB),
|
||||
Sqlite(SqliteDB),
|
||||
Heed(HeedDB),
|
||||
}
|
||||
|
||||
impl Database {
|
||||
fn new(name: &str, path: PathBuf) -> anyhow::Result<Self> {
|
||||
Ok(match name {
|
||||
"sled" => Self::Sled(SledDB::new(db::sled::new_db(path)?)),
|
||||
"heed" => Self::Heed(HeedDB::new(db::heed::new_db(path)?)),
|
||||
"sqlite" => Self::Sqlite(SqliteDB::new(db::sqlite::new_conn(path)?)),
|
||||
_ => panic!("unknown database type: {}", name),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Database {
|
||||
type Target = dyn db::Database;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
match self {
|
||||
Database::Sled(db) => db,
|
||||
Database::Sqlite(db) => db,
|
||||
Database::Heed(db) => db,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for Database {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
match self {
|
||||
Database::Sled(db) => db,
|
||||
Database::Sqlite(db) => db,
|
||||
Database::Heed(db) => db,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const DATABASES: &[&str] = &["heed", "sqlite", "sled"];
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let matches = App::new("Conduit Sled to Sqlite Migrator")
|
||||
.arg(
|
||||
Arg::with_name("from_dir")
|
||||
.short("s")
|
||||
.long("from-dir")
|
||||
.takes_value(true)
|
||||
.long_help("Sets the directory to grab the database from\nWill default to \".\""),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("to_dir")
|
||||
.short("d")
|
||||
.long("to-dir")
|
||||
.takes_value(true)
|
||||
.long_help("Sets the destination directory\nWill default to from_dir"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("from")
|
||||
.short("f")
|
||||
.long("from")
|
||||
.long_help(
|
||||
format!(
|
||||
"The type of database to convert from\nExample: {}",
|
||||
DATABASES.join(", ")
|
||||
)
|
||||
.as_str(),
|
||||
)
|
||||
.takes_value(true)
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("to")
|
||||
.short("t")
|
||||
.long("to")
|
||||
.long_help(
|
||||
format!(
|
||||
"The type of database to convert to\nExample: {}",
|
||||
DATABASES.join(", ")
|
||||
)
|
||||
.as_str(),
|
||||
)
|
||||
.takes_value(true)
|
||||
.required(true),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let src_dir = matches.value_of("from_dir").unwrap_or(".");
|
||||
|
||||
let dst_dir = matches.value_of("to_dir");
|
||||
|
||||
let src_dir = Path::new(src_dir).canonicalize()?;
|
||||
|
||||
if !src_dir.is_dir() {
|
||||
return Err(anyhow::anyhow!("source path must be directory"));
|
||||
}
|
||||
|
||||
let dst_dir = match dst_dir {
|
||||
None => Ok(src_dir.clone()),
|
||||
Some(dir) => {
|
||||
let p = Path::new(dir).canonicalize()?;
|
||||
if !p.is_dir() {
|
||||
Err(anyhow::anyhow!("destination path must be directory"))
|
||||
} else {
|
||||
Ok(p)
|
||||
}
|
||||
}
|
||||
}?;
|
||||
|
||||
dbg!(&src_dir, &dst_dir);
|
||||
|
||||
let mut src_db = Database::new(matches.value_of("from").unwrap(), src_dir)?;
|
||||
|
||||
let mut dst_db = Database::new(matches.value_of("to").unwrap(), dst_dir)?;
|
||||
|
||||
copy_database(&mut *src_db, &mut *dst_db, 1000)?;
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -42,11 +42,11 @@ fn main() -> anyhow::Result<()> {
|
|||
|
||||
dbg!(&source_dir, &dest_dir);
|
||||
|
||||
let sled = sled::SledDB::new(sled::new_db(source_dir)?);
|
||||
let mut sled = sled::SledDB::new(sled::new_db(source_dir)?);
|
||||
|
||||
let mut sqlite = sqlite::SqliteDB::new(sqlite::new_conn(dest_dir)?);
|
||||
|
||||
copy_database(&sled, &mut sqlite, 1000)?;
|
||||
copy_database(&mut sled, &mut sqlite, 1000)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue