morethantext-web/src/morethantext/mod.rs

539 lines
17 KiB
Rust

mod cachetype;
pub mod error;
use async_std::{
fs::{create_dir, read, remove_file, write},
path::Path,
sync::{Arc, Mutex},
task::{sleep, spawn},
};
use cachetype::CacheType;
use error::DBError;
use rand::{distributions::Alphanumeric, thread_rng, Rng};
use std::{
collections::HashMap,
fmt, str,
time::{Duration, Instant},
};
const DATA: &str = "data";
#[derive(Clone)]
struct CacheEntry {
data: CacheType,
last_used: Instant,
}
impl CacheEntry {
fn new(data: CacheType) -> Self {
Self {
data: data,
last_used: Instant::now(),
}
}
fn elapsed(&self) -> Duration {
self.last_used.elapsed()
}
fn touch(&mut self) {
self.last_used = Instant::now();
}
fn update(&mut self, data: CacheType) {
self.data = data;
self.touch();
}
}
impl fmt::Display for CacheEntry {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.data)
}
}
#[derive(Clone)]
pub struct MoreThanText {
cache: Arc<Mutex<HashMap<String, CacheEntry>>>,
dir: String,
}
impl MoreThanText {
pub async fn new(dir: &str) -> Result<Self, DBError> {
let data_dir = Path::new(dir).join(DATA);
if !data_dir.is_dir().await {
match create_dir(&data_dir).await {
Ok(_) => (),
Err(err) => {
let mut error = DBError::new("failed to create data directory");
error.add_source(err);
return Err(error);
}
}
}
let output = Self {
cache: Arc::new(Mutex::new(HashMap::new())),
dir: data_dir.to_str().unwrap().to_string(),
};
let looper = output.cache.clone();
spawn(async move {
let hold_time = Duration::from_secs(300);
loop {
sleep(Duration::from_secs(1)).await;
let mut ids: Vec<String> = Vec::new();
let mut cache = looper.lock().await;
for (id, entry) in cache.iter() {
if entry.elapsed() > hold_time {
ids.push(id.to_string());
}
}
for id in ids.iter() {
cache.remove(id);
}
}
});
Ok(output)
}
fn filename(&self, id: &str) -> String {
let filename = Path::new(&self.dir).join(&id);
filename.into_os_string().into_string().unwrap()
}
async fn add_entry(&self, entry: CacheType) -> Result<String, DBError> {
let mut id: String = "".to_string();
let mut dup = true;
while dup {
id = thread_rng().sample_iter(&Alphanumeric).take(32).collect();
dup = Path::new(&self.dir).join(&id).as_path().exists().await;
}
match write(Path::new(&self.filename(&id)), entry.to_bytes()).await {
Ok(_) => (),
Err(err) => {
let mut error = DBError::new("data write");
error.add_source(err);
return Err(error);
}
};
let mut cache = self.cache.lock().await;
let data = CacheEntry::new(entry);
cache.insert(id.clone(), data);
Ok(id)
}
async fn get_entry(&self, id: &str) -> Result<CacheEntry, DBError> {
let mut cache = self.cache.lock().await;
match cache.get_mut(id) {
Some(entry) => {
entry.touch();
Ok(entry.clone())
}
None => match read(Path::new(&self.filename(id))).await {
Ok(content) => {
let data = CacheEntry::new(CacheType::from_bytes(content).unwrap());
cache.insert(id.to_string(), data.clone());
Ok(data)
}
Err(_) => Err(DBError::new("cache entry not found")),
},
}
}
async fn update_entry(&self, id: &str, entry: CacheType) -> Result<(), DBError> {
match self.get_entry(id).await {
Ok(_) => (),
Err(err) => return Err(err),
}
match write(Path::new(&self.filename(id)), entry.to_bytes()).await {
Ok(_) => (),
Err(err) => {
let mut error = DBError::new("data write");
error.add_source(err);
return Err(error);
}
}
let mut cache = self.cache.lock().await;
let data = CacheEntry::new(entry);
cache.insert(id.to_string(), data);
Ok(())
}
async fn delete_entry(&self, id: &str) -> Result<(), DBError> {
let mut cache = self.cache.lock().await;
cache.remove(id);
match remove_file(Path::new(&self.filename(id))).await {
Ok(_) => Ok(()),
Err(err) => {
let mut error = DBError::new("data delete");
error.add_source(err);
Err(error)
}
}
}
}
#[cfg(test)]
mod setup {
use super::*;
use async_std::fs::remove_dir_all;
use tempfile::{tempdir, TempDir};
pub struct MTT {
pub db: MoreThanText,
pub dir: TempDir,
}
impl MTT {
pub async fn new() -> Self {
let dir = tempdir().unwrap();
let db = MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
Self { db: db, dir: dir }
}
pub async fn create_io_error(&self) {
remove_dir_all(self.dir.path().join(DATA)).await.unwrap();
}
}
}
#[cfg(test)]
mod init {
use super::*;
use std::error::Error;
use tempfile::tempdir;
#[async_std::test]
async fn create_data_dir() {
let dir = tempdir().unwrap();
MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
let data_dir = dir.path().join(DATA);
assert!(data_dir.is_dir(), "Did not create the data directory.");
dir.close().unwrap();
}
#[async_std::test]
async fn existing_data_dir() {
let dir = tempdir().unwrap();
let data_dir = dir.path().join(DATA);
create_dir(data_dir).await.unwrap();
MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
dir.close().unwrap();
}
#[async_std::test]
async fn bad_data_dir() {
let msg = "could not create directory";
match MoreThanText::new("kljsdgfhslkfrh").await {
Ok(_) => assert!(false, "This test should fail to create a data directory"),
Err(err) => {
assert_eq!(err.to_string(), "failed to create data directory");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
};
}
}
#[cfg(test)]
mod cache {
use super::*;
use async_std::fs::read;
use setup::MTT;
use std::error::Error;
#[async_std::test]
async fn entry_ids_are_random() {
let mtt = MTT::new().await;
let data1 = CacheType::Raw("one".to_string());
let data2 = CacheType::Raw("two".to_string());
let id1 = mtt.db.add_entry(data1).await.unwrap();
let id2 = mtt.db.add_entry(data2).await.unwrap();
assert_ne!(id1, id2, "Ids should be unique.")
}
#[async_std::test]
async fn store_cache() {
let mtt = MTT::new().await;
let data = "something";
let expected = CacheType::Raw(data.to_string());
let id = mtt.db.add_entry(expected.clone()).await.unwrap();
let output = mtt.db.get_entry(&id).await.unwrap();
assert_eq!(output.to_string(), data);
let dfile = mtt.dir.path().join(DATA).join(&id);
assert!(dfile.is_file(), "Cache file should exist.");
let content = read(dfile).await.unwrap();
assert_eq!(content, expected.to_bytes());
}
#[async_std::test]
async fn get_entry_uodates_time() {
let mtt = MTT::new().await;
let id = "something";
let holder = CacheEntry {
data: CacheType::Raw("old".to_string()),
last_used: Instant::now() - Duration::from_secs(200),
};
let mut cache = mtt.db.cache.lock().await;
cache.insert(id.to_string(), holder);
drop(cache);
mtt.db.get_entry(&id).await.unwrap();
let cache = mtt.db.cache.lock().await;
let entry = cache.get(id).unwrap();
let held = entry.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
}
#[async_std::test]
async fn retrieve_from_disk() {
let mtt = MTT::new().await;
let id = "someid";
let data = CacheType::Raw("stored".to_string());
write(mtt.dir.path().join(DATA).join(id), data.to_bytes())
.await
.unwrap();
let output = mtt.db.get_entry(id).await.unwrap();
assert_eq!(output.to_string(), data.to_string());
let cache = mtt.db.cache.lock().await;
let stored = cache.get(id);
assert!(stored.is_some(), "Did not store entry in the cache.");
}
#[async_std::test]
async fn store_bad_file() {
let mtt = MTT::new().await;
let msg = "could not write to file";
mtt.create_io_error().await;
match mtt.db.add_entry(CacheType::Raw("fail".to_string())).await {
Ok(_) => assert!(false, "This test should fail."),
Err(err) => {
assert_eq!(err.to_string(), "data write");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
}
}
#[async_std::test]
async fn retrieve_bad_id() {
let mtt = MTT::new().await;
match mtt.db.get_entry(&"Not Valid").await {
Ok(_) => assert!(false, "Should have raised an error."),
Err(err) => assert_eq!(err.to_string(), "cache entry not found"),
}
}
#[async_std::test]
async fn update_cache_entry() {
let mtt = MTT::new().await;
let id = "updateable";
let holder = CacheEntry {
data: CacheType::Raw("elder".to_string()),
last_used: Instant::now() - Duration::from_secs(500),
};
let mut cache = mtt.db.cache.lock().await;
cache.insert(id.to_string(), holder);
drop(cache);
let expected = "different";
let expect = CacheType::Raw(expected.to_string());
mtt.db.update_entry(id, expect.clone()).await.unwrap();
let output = mtt.db.get_entry(id).await.unwrap();
assert_eq!(output.to_string(), expected);
let cache = mtt.db.cache.lock().await;
let entry = cache.get(id).unwrap();
let held = entry.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
drop(cache);
let content = read(mtt.dir.path().join(DATA).join(id)).await.unwrap();
assert_eq!(content, expect.to_bytes());
}
#[async_std::test]
async fn update_bad_id() {
let mtt = MTT::new().await;
match mtt
.db
.update_entry("wilma", CacheType::Raw("wrong".to_string()))
.await
{
Ok(_) => assert!(false, "Bad id should raise an error."),
Err(err) => assert_eq!(err.to_string(), "cache entry not found"),
}
}
#[async_std::test]
async fn update_bad_file() {
let mtt = MTT::new().await;
let msg = "could not write to file";
let id = mtt
.db
.add_entry(CacheType::Raw("fleeting".to_string()))
.await
.unwrap();
mtt.create_io_error().await;
match mtt
.db
.update_entry(&id, CacheType::Raw("failure".to_string()))
.await
{
Ok(_) => assert!(false, "This should produce a write failure."),
Err(err) => {
assert_eq!(err.to_string(), "data write");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
}
}
#[async_std::test]
async fn remove_entry() {
let mtt = MTT::new().await;
let id = mtt
.db
.add_entry(CacheType::Raw("delete".to_string()))
.await
.unwrap();
mtt.db.delete_entry(&id).await.unwrap();
match mtt.db.get_entry(&id).await {
Ok(_) => assert!(false, "Entry should be removed from cache."),
Err(_) => (),
};
}
#[async_std::test]
async fn remove_missing_entry() {
let mtt = MTT::new().await;
let msg = "could not remove file";
match mtt.db.delete_entry("missing").await {
Ok(_) => assert!(false, "This should produce a write failure."),
Err(err) => {
assert_eq!(err.to_string(), "data delete");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
}
}
#[async_std::test]
async fn remove_older() {
let mtt = MTT::new().await;
let id = mtt
.db
.add_entry(CacheType::Raw("removed".to_string()))
.await
.unwrap();
let mut cache = mtt.db.cache.lock().await;
let entry = cache.get_mut(&id).unwrap();
entry.last_used = Instant::now() - Duration::from_secs(1000);
drop(cache);
sleep(Duration::from_secs(2)).await;
let cache = mtt.db.cache.lock().await;
let output = cache.get(&id);
assert!(output.is_none(), "The entry shoould not be in memory.");
drop(cache);
let filename = mtt.db.filename(&id);
let fpath = Path::new(&filename);
assert!(
fpath.is_file().await,
"The stored version should still exist."
);
}
#[async_std::test]
async fn keep_newer() {
let mtt = MTT::new().await;
let id = mtt
.db
.add_entry(CacheType::Raw("keep".to_string()))
.await
.unwrap();
sleep(Duration::from_secs(2)).await;
let cache = mtt.db.cache.lock().await;
let output = cache.get(&id);
assert!(output.is_some(), "The entry shoould be in memory.");
}
}
#[cfg(test)]
mod cache_entry {
use super::*;
#[test]
fn init() {
let text = "new entry";
let holder = CacheEntry::new(CacheType::Raw(text.to_string()));
assert_eq!(holder.to_string(), text);
let held = holder.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
}
#[test]
fn older() {
let secs = 800;
let holder = CacheEntry {
data: CacheType::Raw("older".to_string()),
last_used: Instant::now() - Duration::from_secs(secs),
};
let held = holder.elapsed() - Duration::from_secs(secs);
assert!(
Duration::from_secs(1) > held,
"{:?} should be close to {}s",
holder.elapsed(),
secs
);
}
#[test]
fn accessed() {
let mut holder = CacheEntry {
data: CacheType::Raw("older".to_string()),
last_used: Instant::now() - Duration::from_secs(700),
};
holder.touch();
let held = holder.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
}
#[test]
fn updated() {
let text = "new data";
let mut holder = CacheEntry {
data: CacheType::Raw("old data".to_string()),
last_used: Instant::now() - Duration::from_secs(900),
};
holder.update(CacheType::Raw(text.to_string()));
assert_eq!(holder.to_string(), text);
let held = holder.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
}
}