morethantext-web/src/morethantext/mod-old.rs

824 lines
25 KiB
Rust

mod cache;
mod database;
pub mod error;
mod store;
use async_std::{
fs::{create_dir, read, remove_file, write},
path::Path,
sync::{Arc, Mutex},
task::{sleep, spawn},
};
use database::Database;
use error::{DBError, ErrorCode};
use rand::{distributions::Alphanumeric, thread_rng, Rng};
use std::{
collections::HashMap,
fmt, slice, str,
time::{Duration, Instant},
};
use store::Store;
const DATA: &str = "data";
const ENTRY: &str = "databases";
trait FileData<F> {
fn to_bytes(&self) -> Vec<u8>;
fn from_bytes(data: &mut slice::Iter<u8>) -> Result<F, DBError>;
}
trait SessionData {
fn add(&mut self, key: &str, value: &str, data: &str) -> Result<Vec<String>, DBError>;
fn eq(&self, key: &str, value: &str) -> Result<Vec<String>, DBError>;
fn list(&self, keys: Vec<&str>) -> Result<Vec<String>, DBError>;
}
#[derive(Clone)]
pub enum CacheType {
Raw(String),
DBMap(Store),
TableMap,
}
impl CacheType {
pub fn entry_type(&self) -> String {
match self {
CacheType::Raw(_) => "Raw".to_string(),
CacheType::DBMap(_) => "DBMap".to_string(),
CacheType::TableMap => "TableMap".to_string(),
}
}
pub fn to_bytes(&self) -> Vec<u8> {
let mut output = self.entry_type().into_bytes();
output.push(0);
match self {
CacheType::Raw(s) => output.append(&mut s.as_bytes().to_vec()),
CacheType::DBMap(_) => (),
CacheType::TableMap => (),
}
return output;
}
pub fn from_bytes(data: Vec<u8>) -> Result<CacheType, DBError> {
let mut data_iter = data.iter();
let mut letter: u8;
match data_iter.next() {
Some(item) => letter = *item,
None => return Err(DBError::new("empty file")),
}
let mut header: Vec<u8> = Vec::new();
while letter != 0 {
header.push(letter.clone());
match data_iter.next() {
Some(item) => letter = *item,
None => return Err(DBError::new("incomplete file")),
}
}
let header = str::from_utf8(&header).unwrap().to_string();
match header.as_str() {
"Raw" => {
let mut output: Vec<u8> = Vec::new();
for letter in data_iter {
output.push(letter.clone());
}
Ok(CacheType::Raw(str::from_utf8(&output).unwrap().to_string()))
}
"DBMap" => Ok(CacheType::DBMap(Store::new())),
"TableMap" => Ok(CacheType::TableMap),
_ => Err(DBError::new("data corruption")),
}
}
}
impl fmt::Display for CacheType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
CacheType::Raw(s) => write!(f, "{}", s),
CacheType::DBMap(_) => todo!(),
CacheType::TableMap => todo!(),
}
}
}
#[derive(Clone)]
struct CacheEntry {
data: CacheType,
last_used: Instant,
}
impl CacheEntry {
fn new(data: CacheType) -> Self {
Self {
data: data,
last_used: Instant::now(),
}
}
fn elapsed(&self) -> Duration {
self.last_used.elapsed()
}
fn touch(&mut self) {
self.last_used = Instant::now();
}
fn update(&mut self, data: CacheType) {
self.data = data;
self.touch();
}
}
impl fmt::Display for CacheEntry {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.data)
}
}
#[derive(Clone)]
pub struct MoreThanText {
cache: Arc<Mutex<HashMap<String, CacheEntry>>>,
dir: String,
session: Vec<String>,
}
impl MoreThanText {
pub async fn new(dir: &str) -> Result<Self, DBError> {
let data_dir = Path::new(dir).join(DATA);
if !data_dir.is_dir().await {
match create_dir(&data_dir).await {
Ok(_) => (),
Err(err) => {
let mut error = DBError::new("failed to create data directory");
error.add_source(err);
return Err(error);
}
}
}
let mut output = Self {
cache: Arc::new(Mutex::new(HashMap::new())),
dir: data_dir.to_str().unwrap().to_string(),
session: Vec::new(),
};
let entry_file = Path::new(dir).join(ENTRY);
let id: String;
if entry_file.is_file().await {
let holder = read(entry_file).await.unwrap();
id = str::from_utf8(&holder).unwrap().to_string();
} else {
id = output
.add_entry(CacheType::DBMap(Store::new()))
.await
.unwrap();
write(entry_file, id.as_bytes()).await.unwrap();
}
output.session.push(id);
let looper = output.cache.clone();
spawn(async move {
let hold_time = Duration::from_secs(300);
loop {
sleep(Duration::from_secs(1)).await;
let mut ids: Vec<String> = Vec::new();
let mut cache = looper.lock().await;
for (id, entry) in cache.iter() {
if entry.elapsed() > hold_time {
ids.push(id.to_string());
}
}
for id in ids.iter() {
cache.remove(id);
}
}
});
Ok(output)
}
fn filename(&self, id: &str) -> String {
let filename = Path::new(&self.dir).join(&id);
filename.into_os_string().into_string().unwrap()
}
fn new_id(&self) -> String {
thread_rng().sample_iter(&Alphanumeric).take(64).collect()
}
async fn add(&self, feature: &str, key: &str, value: &str) -> Self {
let mut ids: Vec<String> = Vec::new();
for id in self.session.clone().into_iter() {
let holder = self.get_entry(&id).await.unwrap();
//holder.add(feature, key, value);
}
let mut output = self.clone();
output.session.clear();
output.session.push(value.to_string());
output
}
async fn list(&self, feature: Vec<&str>) -> Result<Vec<String>, DBError> {
Ok(Vec::new())
}
async fn add_entry(&self, entry: CacheType) -> Result<String, DBError> {
let mut id: String = "".to_string();
let mut dup = true;
while dup {
id = thread_rng().sample_iter(&Alphanumeric).take(32).collect();
dup = Path::new(&self.dir).join(&id).as_path().exists().await;
}
match write(Path::new(&self.filename(&id)), entry.to_bytes()).await {
Ok(_) => (),
Err(err) => {
let mut error = DBError::new("data write");
error.add_source(err);
return Err(error);
}
};
let mut cache = self.cache.lock().await;
let data = CacheEntry::new(entry);
cache.insert(id.clone(), data);
Ok(id)
}
async fn get_entry(&self, id: &str) -> Result<CacheEntry, DBError> {
let mut cache = self.cache.lock().await;
match cache.get_mut(id) {
Some(entry) => {
entry.touch();
Ok(entry.clone())
}
None => match read(Path::new(&self.filename(id))).await {
Ok(content) => {
let data = CacheEntry::new(CacheType::from_bytes(content).unwrap());
cache.insert(id.to_string(), data.clone());
Ok(data)
}
Err(_) => Err(DBError::new("cache entry not found")),
},
}
}
async fn update_entry(&self, id: &str, entry: CacheType) -> Result<(), DBError> {
match self.get_entry(id).await {
Ok(_) => (),
Err(err) => return Err(err),
}
match write(Path::new(&self.filename(id)), entry.to_bytes()).await {
Ok(_) => (),
Err(err) => {
let mut error = DBError::new("data write");
error.add_source(err);
return Err(error);
}
}
let mut cache = self.cache.lock().await;
let data = CacheEntry::new(entry);
cache.insert(id.to_string(), data);
Ok(())
}
async fn delete_entry(&self, id: &str) -> Result<(), DBError> {
let mut cache = self.cache.lock().await;
cache.remove(id);
match remove_file(Path::new(&self.filename(id))).await {
Ok(_) => Ok(()),
Err(err) => {
let mut error = DBError::new("data delete");
error.add_source(err);
Err(error)
}
}
}
}
#[cfg(test)]
mod setup {
use super::*;
use async_std::fs::remove_dir_all;
use tempfile::{tempdir, TempDir};
pub struct MTT {
pub db: MoreThanText,
pub dir: TempDir,
}
impl MTT {
pub async fn new() -> Self {
let dir = tempdir().unwrap();
let db = MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
Self { db: db, dir: dir }
}
pub async fn create_io_error(&self) {
remove_dir_all(self.dir.path().join(DATA)).await.unwrap();
}
}
}
#[cfg(test)]
mod init {
use super::*;
use std::error::Error;
use tempfile::tempdir;
#[async_std::test]
async fn create_data_dir() {
let dir = tempdir().unwrap();
MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
let data_dir = dir.path().join(DATA);
assert!(data_dir.is_dir(), "Did not create the data directory.");
dir.close().unwrap();
}
#[async_std::test]
async fn existing_data_dir() {
let dir = tempdir().unwrap();
let data_dir = dir.path().join(DATA);
create_dir(data_dir).await.unwrap();
MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
dir.close().unwrap();
}
#[async_std::test]
async fn bad_data_dir() {
let msg = "could not create directory";
match MoreThanText::new("kljsdgfhslkfrh").await {
Ok(_) => assert!(false, "This test should fail to create a data directory"),
Err(err) => {
assert_eq!(err.to_string(), "failed to create data directory");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
};
}
#[async_std::test]
async fn creates_entry_point() {
let dir = tempdir().unwrap();
let db = MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
let entry = dir.path().join(ENTRY);
assert!(entry.is_file(), "Did not create entry point file.");
let data = read(entry).await.unwrap();
let id = str::from_utf8(&data).unwrap();
let cache = db.get_entry(&id).await.unwrap();
assert_eq!(cache.data.entry_type(), "DBMap");
assert_eq!(db.session, [id]);
}
#[async_std::test]
async fn use_existing_entry_point() {
let dir = tempdir().unwrap();
let db1 = MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
let db2 = MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
assert_eq!(db1.session, db2.session, "Did not read existing entry.");
}
}
#[cfg(test)]
mod data {
use super::*;
use setup::MTT;
#[async_std::test]
async fn ids_are_random() {
let mtt = MTT::new().await;
let id1 = mtt.db.new_id();
let id2 = mtt.db.new_id();
assert_ne!(id1, id2, "Ids should be random");
}
#[async_std::test]
async fn add_database() {
let mtt = MTT::new().await;
let name = "fred";
let id = "*gsdfg";
let output = mtt.db.add("database", name, id).await;
assert_eq!(output.session, [id], "should update session info.");
/*
assert_eq!(
mtt.db.list(["database"].to_vec()).await.unwrap(),
[name],
"Should list the databases."
);
*/
}
}
#[cfg(test)]
mod cache_test {
use super::*;
use async_std::fs::read;
use setup::MTT;
use std::error::Error;
#[async_std::test]
async fn entry_ids_are_random() {
let mtt = MTT::new().await;
let data1 = CacheType::Raw("one".to_string());
let data2 = CacheType::Raw("two".to_string());
let id1 = mtt.db.add_entry(data1).await.unwrap();
let id2 = mtt.db.add_entry(data2).await.unwrap();
assert_ne!(id1, id2, "Ids should be unique.")
}
#[async_std::test]
async fn store_cache() {
let mtt = MTT::new().await;
let data = "something";
let expected = CacheType::Raw(data.to_string());
let id = mtt.db.add_entry(expected.clone()).await.unwrap();
let output = mtt.db.get_entry(&id).await.unwrap();
assert_eq!(output.to_string(), data);
let dfile = mtt.dir.path().join(DATA).join(&id);
assert!(dfile.is_file(), "Cache file should exist.");
let content = read(dfile).await.unwrap();
assert_eq!(content, expected.to_bytes());
}
#[async_std::test]
async fn get_entry_uodates_time() {
let mtt = MTT::new().await;
let id = "something";
let holder = CacheEntry {
data: CacheType::Raw("old".to_string()),
last_used: Instant::now() - Duration::from_secs(200),
};
let mut cache = mtt.db.cache.lock().await;
cache.insert(id.to_string(), holder);
drop(cache);
mtt.db.get_entry(&id).await.unwrap();
let cache = mtt.db.cache.lock().await;
let entry = cache.get(id).unwrap();
let held = entry.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
}
#[async_std::test]
async fn retrieve_from_disk() {
let mtt = MTT::new().await;
let id = "someid";
let data = CacheType::Raw("stored".to_string());
write(mtt.dir.path().join(DATA).join(id), data.to_bytes())
.await
.unwrap();
let output = mtt.db.get_entry(id).await.unwrap();
assert_eq!(output.to_string(), data.to_string());
let cache = mtt.db.cache.lock().await;
let stored = cache.get(id);
assert!(stored.is_some(), "Did not store entry in the cache.");
}
#[async_std::test]
async fn store_bad_file() {
let mtt = MTT::new().await;
let msg = "could not write to file";
mtt.create_io_error().await;
match mtt.db.add_entry(CacheType::Raw("fail".to_string())).await {
Ok(_) => assert!(false, "This test should fail."),
Err(err) => {
assert_eq!(err.to_string(), "data write");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
}
}
#[async_std::test]
async fn retrieve_bad_id() {
let mtt = MTT::new().await;
match mtt.db.get_entry(&"Not Valid").await {
Ok(_) => assert!(false, "Should have raised an error."),
Err(err) => assert_eq!(err.to_string(), "cache entry not found"),
}
}
#[async_std::test]
async fn update_cache_entry() {
let mtt = MTT::new().await;
let id = "updateable";
let holder = CacheEntry {
data: CacheType::Raw("elder".to_string()),
last_used: Instant::now() - Duration::from_secs(500),
};
let mut cache = mtt.db.cache.lock().await;
cache.insert(id.to_string(), holder);
drop(cache);
let expected = "different";
let expect = CacheType::Raw(expected.to_string());
mtt.db.update_entry(id, expect.clone()).await.unwrap();
let output = mtt.db.get_entry(id).await.unwrap();
assert_eq!(output.to_string(), expected);
let cache = mtt.db.cache.lock().await;
let entry = cache.get(id).unwrap();
let held = entry.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
drop(cache);
let content = read(mtt.dir.path().join(DATA).join(id)).await.unwrap();
assert_eq!(content, expect.to_bytes());
}
#[async_std::test]
async fn update_bad_id() {
let mtt = MTT::new().await;
match mtt
.db
.update_entry("wilma", CacheType::Raw("wrong".to_string()))
.await
{
Ok(_) => assert!(false, "Bad id should raise an error."),
Err(err) => assert_eq!(err.to_string(), "cache entry not found"),
}
}
#[async_std::test]
async fn update_bad_file() {
let mtt = MTT::new().await;
let msg = "could not write to file";
let id = mtt
.db
.add_entry(CacheType::Raw("fleeting".to_string()))
.await
.unwrap();
mtt.create_io_error().await;
match mtt
.db
.update_entry(&id, CacheType::Raw("failure".to_string()))
.await
{
Ok(_) => assert!(false, "This should produce a write failure."),
Err(err) => {
assert_eq!(err.to_string(), "data write");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
}
}
#[async_std::test]
async fn remove_entry() {
let mtt = MTT::new().await;
let id = mtt
.db
.add_entry(CacheType::Raw("delete".to_string()))
.await
.unwrap();
mtt.db.delete_entry(&id).await.unwrap();
match mtt.db.get_entry(&id).await {
Ok(_) => assert!(false, "Entry should be removed from cache."),
Err(_) => (),
};
}
#[async_std::test]
async fn remove_missing_entry() {
let mtt = MTT::new().await;
let msg = "could not remove file";
match mtt.db.delete_entry("missing").await {
Ok(_) => assert!(false, "This should produce a write failure."),
Err(err) => {
assert_eq!(err.to_string(), "data delete");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
}
}
#[async_std::test]
async fn remove_older() {
let mtt = MTT::new().await;
let id = mtt
.db
.add_entry(CacheType::Raw("removed".to_string()))
.await
.unwrap();
let mut cache = mtt.db.cache.lock().await;
let entry = cache.get_mut(&id).unwrap();
entry.last_used = Instant::now() - Duration::from_secs(1000);
drop(cache);
sleep(Duration::from_secs(2)).await;
let cache = mtt.db.cache.lock().await;
let output = cache.get(&id);
assert!(output.is_none(), "The entry shoould not be in memory.");
drop(cache);
let filename = mtt.db.filename(&id);
let fpath = Path::new(&filename);
assert!(
fpath.is_file().await,
"The stored version should still exist."
);
}
#[async_std::test]
async fn keep_newer() {
let mtt = MTT::new().await;
let id = mtt
.db
.add_entry(CacheType::Raw("keep".to_string()))
.await
.unwrap();
sleep(Duration::from_secs(2)).await;
let cache = mtt.db.cache.lock().await;
let output = cache.get(&id);
assert!(output.is_some(), "The entry shoould be in memory.");
}
}
#[cfg(test)]
mod cache_entry {
use super::*;
#[test]
fn init() {
let text = "new entry";
let holder = CacheEntry::new(CacheType::Raw(text.to_string()));
assert_eq!(holder.to_string(), text);
let held = holder.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
}
#[test]
fn older() {
let secs = 800;
let holder = CacheEntry {
data: CacheType::Raw("older".to_string()),
last_used: Instant::now() - Duration::from_secs(secs),
};
let held = holder.elapsed() - Duration::from_secs(secs);
assert!(
Duration::from_secs(1) > held,
"{:?} should be close to {}s",
holder.elapsed(),
secs
);
}
#[test]
fn accessed() {
let mut holder = CacheEntry {
data: CacheType::Raw("older".to_string()),
last_used: Instant::now() - Duration::from_secs(700),
};
holder.touch();
let held = holder.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
}
#[test]
fn updated() {
let text = "new data";
let mut holder = CacheEntry {
data: CacheType::Raw("old data".to_string()),
last_used: Instant::now() - Duration::from_secs(900),
};
holder.update(CacheType::Raw(text.to_string()));
assert_eq!(holder.to_string(), text);
let held = holder.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
}
}
#[cfg(test)]
mod enum_ctype {
use super::*;
#[test]
fn bad_file_header() {
let mut data: Vec<u8> = Vec::new();
let mut ctype = "jlksdfg".as_bytes().to_vec();
let mut cdata = "ghjk".as_bytes().to_vec();
data.append(&mut ctype);
data.push(0);
data.append(&mut cdata);
match CacheType::from_bytes(data) {
Ok(_) => assert!(false, "This should fail."),
Err(err) => assert_eq!(err.to_string(), "data corruption"),
}
}
#[test]
fn incomplete_file() {
let mut data: Vec<u8> = Vec::new();
let mut ctype = "uoisfde".as_bytes().to_vec();
data.append(&mut ctype);
match CacheType::from_bytes(data) {
Ok(_) => assert!(false, "This should fail."),
Err(err) => assert_eq!(err.to_string(), "incomplete file"),
}
}
#[test]
fn empty_file() {
let data: Vec<u8> = Vec::new();
match CacheType::from_bytes(data) {
Ok(_) => assert!(false, "This should fail."),
Err(err) => assert_eq!(err.to_string(), "empty file"),
}
}
#[test]
fn get_raw_type() {
let holder = CacheType::Raw("nothing important".to_string());
assert_eq!(holder.entry_type(), "Raw");
}
#[test]
fn get_raw_bytes() {
let data = "addams";
let holder = CacheType::Raw(data.to_string());
let mut expected = holder.entry_type().into_bytes();
expected.push(0);
expected.append(&mut data.as_bytes().to_vec());
let output = holder.to_bytes();
assert_eq!(output, expected);
}
#[test]
fn from_raw_bytes() {
let holder = CacheType::Raw("stored item".to_string());
let data = holder.to_bytes();
let output = CacheType::from_bytes(data).unwrap();
assert_eq!(output.to_string(), holder.to_string());
}
#[test]
fn get_dbmap_type() {
let holder = CacheType::DBMap(Store::new());
assert_eq!(holder.entry_type(), "DBMap");
}
#[test]
fn get_new_databases_bytes() {
let holder = CacheType::DBMap(Store::new());
let mut expected = "DBMap".as_bytes().to_vec();
expected.push(0);
let output = holder.to_bytes();
assert_eq!(output, expected);
}
#[test]
fn from_new_databases_bytes() {
let mut data = "DBMap".as_bytes().to_vec();
data.push(0);
let output = CacheType::from_bytes(data).unwrap();
assert_eq!(output.entry_type(), "DBMap");
}
#[test]
fn get_tablemap_type() {
let holder = CacheType::TableMap;
assert_eq!(holder.entry_type(), "TableMap");
}
#[test]
fn get_new_database_bytes() {
let holder = CacheType::TableMap;
let mut expected = "TableMap".as_bytes().to_vec();
expected.push(0);
let output = holder.to_bytes();
assert_eq!(output, expected);
}
#[test]
fn from_new_database_bytes() {
let mut data = "TableMap".as_bytes().to_vec();
data.push(0);
let output = CacheType::from_bytes(data).unwrap();
assert_eq!(output.entry_type(), "TableMap");
}
}