morethantext-web/src/morethantext/mod.rs

509 lines
15 KiB
Rust
Raw Normal View History

2022-08-06 12:03:47 -04:00
pub mod error;
2022-12-05 23:23:39 -05:00
use async_std::{
2022-12-19 15:21:07 -05:00
fs::{create_dir, read, write},
2022-12-05 23:23:39 -05:00
path::Path,
sync::{Arc, Mutex},
};
2022-09-27 07:31:59 -04:00
use error::DBError;
2022-12-04 11:09:56 -05:00
use rand::{distributions::Alphanumeric, thread_rng, Rng};
2022-12-24 02:03:20 -05:00
use std::{
collections::HashMap,
fmt, str,
time::{Duration, Instant},
};
2022-12-02 10:34:45 -05:00
const DATA: &str = "data";
2022-08-06 12:03:47 -04:00
2022-12-05 23:21:13 -05:00
#[derive(Clone)]
2022-12-21 10:27:16 -05:00
enum CacheType {
2022-12-03 09:15:58 -05:00
Raw(String),
}
2022-12-21 10:27:16 -05:00
impl CacheType {
2022-12-10 09:26:06 -05:00
fn entry_type(&self) -> String {
match self {
2022-12-21 10:27:16 -05:00
CacheType::Raw(_) => "Raw".to_string(),
2022-12-10 09:26:06 -05:00
}
}
2022-12-11 09:34:07 -05:00
fn to_bytes(&self) -> Vec<u8> {
let mut output = self.entry_type().into_bytes();
output.push(0);
match self {
2022-12-21 10:27:16 -05:00
CacheType::Raw(s) => output.append(&mut s.as_bytes().to_vec()),
2022-12-11 09:34:07 -05:00
}
return output;
}
2022-12-19 15:21:07 -05:00
2022-12-21 10:27:16 -05:00
fn from_bytes(data: Vec<u8>) -> CacheType {
2022-12-19 15:21:07 -05:00
let mut data_iter = data.iter();
let mut holder: u8 = *data_iter.next().unwrap();
while holder != 0 {
holder = *data_iter.next().unwrap();
}
let mut output: Vec<u8> = Vec::new();
for letter in data_iter {
output.push(letter.clone());
}
2022-12-21 10:27:16 -05:00
CacheType::Raw(str::from_utf8(&output).unwrap().to_string())
2022-12-19 15:21:07 -05:00
}
2022-12-10 09:26:06 -05:00
}
2022-12-21 10:27:16 -05:00
impl fmt::Display for CacheType {
2022-12-05 23:21:13 -05:00
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
2022-12-21 10:27:16 -05:00
CacheType::Raw(s) => write!(f, "{}", s),
}
}
}
#[derive(Clone)]
struct CacheEntry {
data: CacheType,
last_used: Instant,
}
impl CacheEntry {
fn new(data: CacheType) -> Self {
Self {
data: data,
last_used: Instant::now(),
2022-12-05 23:21:13 -05:00
}
}
2022-12-24 02:03:20 -05:00
fn elapsed(&self) -> Duration {
self.last_used.elapsed()
}
fn touch(&mut self) {
self.last_used = Instant::now();
}
fn update(&mut self, data: CacheType) {
self.data = data;
self.touch();
}
2022-12-05 23:21:13 -05:00
}
2022-12-21 10:27:16 -05:00
impl fmt::Display for CacheEntry {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.data)
}
}
2022-07-18 17:24:45 -04:00
#[derive(Clone)]
2022-12-05 23:21:13 -05:00
pub struct MoreThanText {
cache: Arc<Mutex<HashMap<String, CacheEntry>>>,
2022-12-10 09:26:06 -05:00
dir: String,
2022-12-05 23:21:13 -05:00
}
2022-07-18 17:24:45 -04:00
impl MoreThanText {
2022-12-02 10:34:45 -05:00
pub async fn new(dir: &str) -> Result<Self, DBError> {
let data_dir = Path::new(dir).join(DATA);
if !data_dir.is_dir().await {
2022-12-10 09:26:06 -05:00
match create_dir(&data_dir).await {
2022-12-02 10:34:45 -05:00
Ok(_) => (),
Err(err) => {
2022-12-03 08:26:21 -05:00
let mut error = DBError::new("failed to create data directory");
2022-12-02 10:34:45 -05:00
error.add_source(err);
return Err(error);
}
2022-08-06 12:03:47 -04:00
}
}
2022-12-05 23:21:13 -05:00
Ok(Self {
cache: Arc::new(Mutex::new(HashMap::new())),
2022-12-10 09:26:06 -05:00
dir: data_dir.to_str().unwrap().to_string(),
2022-12-05 23:21:13 -05:00
})
2022-08-07 09:29:08 -04:00
}
2022-12-03 09:15:58 -05:00
2022-12-24 11:30:45 -05:00
fn filename(&self, id: &str) -> String {
let filename = Path::new(&self.dir).join(&id);
filename.into_os_string().into_string().unwrap()
}
2022-12-21 10:27:16 -05:00
async fn add_entry(&self, entry: CacheType) -> Result<String, DBError> {
2022-12-16 08:22:36 -05:00
let mut id: String = "".to_string();
let mut dup = true;
while dup {
id = thread_rng().sample_iter(&Alphanumeric).take(32).collect();
dup = Path::new(&self.dir).join(&id).as_path().exists().await;
}
2022-12-24 11:30:45 -05:00
match write(Path::new(&self.filename(&id)), entry.to_bytes()).await {
2022-12-18 11:12:58 -05:00
Ok(_) => (),
Err(err) => {
let mut error = DBError::new("data write");
error.add_source(err);
return Err(error);
}
};
2022-12-05 23:21:13 -05:00
let mut cache = self.cache.lock().await;
2022-12-21 10:27:16 -05:00
let data = CacheEntry::new(entry);
cache.insert(id.clone(), data);
2022-12-18 11:12:58 -05:00
Ok(id)
2022-12-03 09:15:58 -05:00
}
2022-12-05 23:21:13 -05:00
2022-12-06 11:42:11 -05:00
async fn get_entry(&self, id: &str) -> Result<CacheEntry, DBError> {
2022-12-24 10:28:49 -05:00
let mut cache = self.cache.lock().await;
match cache.get_mut(id) {
Some(entry) => {
entry.touch();
Ok(entry.clone())
}
2022-12-24 11:30:45 -05:00
None => match read(Path::new(&self.filename(id))).await {
2022-12-21 10:27:16 -05:00
Ok(content) => {
2022-12-24 10:28:49 -05:00
let data = CacheEntry::new(CacheType::from_bytes(content));
cache.insert(id.to_string(), data.clone());
Ok(data)
2022-12-21 10:27:16 -05:00
}
2022-12-19 15:21:07 -05:00
Err(_) => Err(DBError::new("cache entry not found")),
},
2022-12-06 11:42:11 -05:00
}
2022-12-05 23:21:13 -05:00
}
2022-12-08 10:58:29 -05:00
2022-12-21 10:27:16 -05:00
async fn update_entry(&self, id: &str, entry: CacheType) -> Result<(), DBError> {
2022-12-08 10:58:29 -05:00
match self.get_entry(id).await {
Ok(_) => (),
Err(err) => return Err(err),
}
2022-12-24 11:30:45 -05:00
match write(Path::new(&self.filename(id)), entry.to_bytes()).await {
2022-12-19 10:33:04 -05:00
Ok(_) => (),
Err(err) => {
let mut error = DBError::new("data write");
error.add_source(err);
return Err(error);
}
}
2022-12-08 10:58:29 -05:00
let mut cache = self.cache.lock().await;
2022-12-21 10:27:16 -05:00
let data = CacheEntry::new(entry);
cache.insert(id.to_string(), data);
2022-12-08 10:58:29 -05:00
Ok(())
}
}
2022-12-03 10:22:54 -05:00
#[cfg(test)]
mod setup {
use super::*;
2022-12-18 11:12:58 -05:00
use async_std::fs::remove_dir_all;
2022-12-03 10:22:54 -05:00
use tempfile::{tempdir, TempDir};
pub struct MTT {
pub db: MoreThanText,
pub dir: TempDir,
}
impl MTT {
pub async fn new() -> Self {
let dir = tempdir().unwrap();
let db = MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
Self { db: db, dir: dir }
}
2022-12-18 11:12:58 -05:00
pub async fn create_io_error(&self) {
remove_dir_all(self.dir.path().join(DATA)).await.unwrap();
}
2022-12-03 10:22:54 -05:00
}
}
#[cfg(test)]
2022-12-02 10:34:45 -05:00
mod init {
use super::*;
2022-12-02 10:34:45 -05:00
use std::error::Error;
use tempfile::tempdir;
#[async_std::test]
2022-12-02 10:34:45 -05:00
async fn create_data_dir() {
let dir = tempdir().unwrap();
MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
let data_dir = dir.path().join(DATA);
assert!(data_dir.is_dir(), "Did not create the data directory.");
dir.close().unwrap();
}
2022-08-06 12:03:47 -04:00
#[async_std::test]
2022-12-02 10:34:45 -05:00
async fn existing_data_dir() {
let dir = tempdir().unwrap();
let data_dir = dir.path().join(DATA);
create_dir(data_dir).await.unwrap();
MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
dir.close().unwrap();
}
2022-08-07 09:29:08 -04:00
#[async_std::test]
2022-12-02 10:34:45 -05:00
async fn bad_data_dir() {
2022-12-18 11:12:58 -05:00
let msg = "could not create directory";
2022-12-02 10:34:45 -05:00
match MoreThanText::new("kljsdgfhslkfrh").await {
Ok(_) => assert!(false, "This test should fail to create a data directory"),
2022-09-27 07:31:59 -04:00
Err(err) => {
2022-12-03 08:26:21 -05:00
assert_eq!(err.to_string(), "failed to create data directory");
2022-12-02 10:34:45 -05:00
assert!(err.source().is_some(), "Must include the source error.");
2022-12-18 11:12:58 -05:00
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
2022-09-27 07:31:59 -04:00
}
2022-12-02 10:34:45 -05:00
};
2022-08-05 16:47:01 -04:00
}
}
2022-12-03 09:15:58 -05:00
#[cfg(test)]
mod cache {
use super::*;
2022-12-19 08:12:32 -05:00
use async_std::fs::read;
2022-12-03 10:22:54 -05:00
use setup::MTT;
2022-12-18 11:12:58 -05:00
use std::error::Error;
2022-12-03 09:15:58 -05:00
2022-12-15 12:20:40 -05:00
#[async_std::test]
async fn entry_ids_are_random() {
2022-12-03 10:22:54 -05:00
let mtt = MTT::new().await;
2022-12-21 10:27:16 -05:00
let data1 = CacheType::Raw("one".to_string());
let data2 = CacheType::Raw("two".to_string());
2022-12-18 11:12:58 -05:00
let id1 = mtt.db.add_entry(data1).await.unwrap();
let id2 = mtt.db.add_entry(data2).await.unwrap();
2022-12-04 11:09:56 -05:00
assert_ne!(id1, id2, "Ids should be unique.")
2022-12-03 09:15:58 -05:00
}
2022-12-05 23:21:13 -05:00
#[async_std::test]
2022-12-13 07:19:05 -05:00
async fn store_cache() {
2022-12-05 23:21:13 -05:00
let mtt = MTT::new().await;
let data = "something";
2022-12-21 10:27:16 -05:00
let expected = CacheType::Raw(data.to_string());
2022-12-18 11:12:58 -05:00
let id = mtt.db.add_entry(expected.clone()).await.unwrap();
2022-12-06 11:42:11 -05:00
let output = mtt.db.get_entry(&id).await.unwrap();
2022-12-05 23:21:13 -05:00
assert_eq!(output.to_string(), data);
2022-12-10 09:26:06 -05:00
let dfile = mtt.dir.path().join(DATA).join(&id);
assert!(dfile.is_file(), "Cache file should exist.");
2022-12-13 07:19:05 -05:00
let content = read(dfile).await.unwrap();
assert_eq!(content, expected.to_bytes());
2022-12-05 23:21:13 -05:00
}
2022-12-06 11:42:11 -05:00
2022-12-24 10:28:49 -05:00
#[async_std::test]
async fn get_entry_uodates_time() {
let mtt = MTT::new().await;
let id = "something";
let holder = CacheEntry {
data: CacheType::Raw("old".to_string()),
last_used: Instant::now() - Duration::from_secs(200),
};
let mut cache = mtt.db.cache.lock().await;
cache.insert(id.to_string(), holder);
drop(cache);
mtt.db.get_entry(&id).await.unwrap();
let cache = mtt.db.cache.lock().await;
let entry = cache.get(id).unwrap();
let held = entry.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
}
2022-12-19 15:21:07 -05:00
#[async_std::test]
async fn retrieve_from_disk() {
let mtt = MTT::new().await;
let id = "someid";
2022-12-21 10:27:16 -05:00
let data = CacheType::Raw("stored".to_string());
2022-12-19 15:21:07 -05:00
write(mtt.dir.path().join(DATA).join(id), data.to_bytes())
.await
.unwrap();
let output = mtt.db.get_entry(id).await.unwrap();
assert_eq!(output.to_string(), data.to_string());
2022-12-24 10:28:49 -05:00
let cache = mtt.db.cache.lock().await;
let stored = cache.get(id);
assert!(stored.is_some(), "Did not store entry in the cache.");
2022-12-19 15:21:07 -05:00
}
2022-12-18 11:12:58 -05:00
#[async_std::test]
async fn store_bad_file() {
let mtt = MTT::new().await;
let msg = "could not write to file";
mtt.create_io_error().await;
2022-12-21 10:27:16 -05:00
match mtt.db.add_entry(CacheType::Raw("fail".to_string())).await {
2022-12-18 11:12:58 -05:00
Ok(_) => assert!(false, "This test should fail."),
Err(err) => {
assert_eq!(err.to_string(), "data write");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
}
}
2022-12-06 11:42:11 -05:00
#[async_std::test]
async fn retrieve_bad_id() {
let mtt = MTT::new().await;
match mtt.db.get_entry(&"Not Valid").await {
Ok(_) => assert!(false, "Should have raised an error."),
Err(err) => assert_eq!(err.to_string(), "cache entry not found"),
}
}
2022-12-08 10:58:29 -05:00
#[async_std::test]
async fn update_cache_entry() {
let mtt = MTT::new().await;
2022-12-24 10:28:49 -05:00
let id = "updateable";
let holder = CacheEntry {
data: CacheType::Raw("elder".to_string()),
last_used: Instant::now() - Duration::from_secs(500),
};
let mut cache = mtt.db.cache.lock().await;
cache.insert(id.to_string(), holder);
drop(cache);
2022-12-08 10:58:29 -05:00
let expected = "different";
2022-12-21 10:27:16 -05:00
let expect = CacheType::Raw(expected.to_string());
2022-12-24 10:28:49 -05:00
mtt.db.update_entry(id, expect.clone()).await.unwrap();
let output = mtt.db.get_entry(id).await.unwrap();
2022-12-08 10:58:29 -05:00
assert_eq!(output.to_string(), expected);
2022-12-24 10:28:49 -05:00
let cache = mtt.db.cache.lock().await;
let entry = cache.get(id).unwrap();
let held = entry.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
drop(cache);
2022-12-19 08:12:32 -05:00
let content = read(mtt.dir.path().join(DATA).join(id)).await.unwrap();
assert_eq!(content, expect.to_bytes());
2022-12-08 10:58:29 -05:00
}
#[async_std::test]
async fn update_bad_id() {
let mtt = MTT::new().await;
2022-12-08 11:08:05 -05:00
match mtt
.db
2022-12-21 10:27:16 -05:00
.update_entry("wilma", CacheType::Raw("wrong".to_string()))
2022-12-08 11:08:05 -05:00
.await
{
2022-12-08 10:58:29 -05:00
Ok(_) => assert!(false, "Bad id should raise an error."),
Err(err) => assert_eq!(err.to_string(), "cache entry not found"),
}
}
2022-12-19 10:33:04 -05:00
#[async_std::test]
async fn update_bad_file() {
let mtt = MTT::new().await;
let msg = "could not write to file";
let id = mtt
.db
2022-12-21 10:27:16 -05:00
.add_entry(CacheType::Raw("fleeting".to_string()))
2022-12-19 10:33:04 -05:00
.await
.unwrap();
mtt.create_io_error().await;
match mtt
.db
2022-12-21 10:27:16 -05:00
.update_entry(&id, CacheType::Raw("failure".to_string()))
2022-12-19 10:33:04 -05:00
.await
{
Ok(_) => assert!(false, "This should produce a write failure."),
Err(err) => {
assert_eq!(err.to_string(), "data write");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
}
}
2022-12-03 09:15:58 -05:00
}
2022-12-10 09:26:06 -05:00
#[cfg(test)]
2022-12-24 02:03:20 -05:00
mod cache_type {
2022-12-10 09:26:06 -05:00
use super::*;
#[test]
2022-12-11 09:34:07 -05:00
fn raw_get_type() {
2022-12-21 10:27:16 -05:00
let holder = CacheType::Raw("nothing important".to_string());
2022-12-11 09:34:07 -05:00
assert_eq!(holder.entry_type(), "Raw");
}
#[test]
fn raw_get_bytes() {
let data = "addams";
2022-12-21 10:27:16 -05:00
let holder = CacheType::Raw(data.to_string());
2022-12-11 09:34:07 -05:00
let mut expected = holder.entry_type().into_bytes();
expected.push(0);
expected.append(&mut data.as_bytes().to_vec());
let output = holder.to_bytes();
assert_eq!(output, expected);
2022-12-10 09:26:06 -05:00
}
2022-12-19 15:21:07 -05:00
#[test]
fn raw_from_bytes() {
2022-12-21 10:27:16 -05:00
let holder = CacheType::Raw("stored item".to_string());
2022-12-19 15:21:07 -05:00
let data = holder.to_bytes();
2022-12-21 10:27:16 -05:00
let output = CacheType::from_bytes(data);
2022-12-19 15:21:07 -05:00
assert_eq!(output.to_string(), holder.to_string());
}
2022-12-10 09:26:06 -05:00
}
2022-12-24 02:03:20 -05:00
#[cfg(test)]
mod cache_entry {
use super::*;
#[test]
fn init() {
let text = "new entry";
let holder = CacheEntry::new(CacheType::Raw(text.to_string()));
assert_eq!(holder.to_string(), text);
let held = holder.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
}
#[test]
fn older() {
let secs = 800;
let holder = CacheEntry {
data: CacheType::Raw("older".to_string()),
last_used: Instant::now() - Duration::from_secs(secs),
};
let held = holder.elapsed() - Duration::from_secs(secs);
assert!(
Duration::from_secs(1) > held,
"{:?} should be close to {}s",
holder.elapsed(),
secs
);
}
#[test]
fn accessed() {
let mut holder = CacheEntry {
data: CacheType::Raw("older".to_string()),
last_used: Instant::now() - Duration::from_secs(700),
};
holder.touch();
let held = holder.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
}
#[test]
fn updated() {
let text = "new data";
let mut holder = CacheEntry {
data: CacheType::Raw("old data".to_string()),
last_used: Instant::now() - Duration::from_secs(900),
};
holder.update(CacheType::Raw(text.to_string()));
assert_eq!(holder.to_string(), text);
let held = holder.elapsed();
assert!(
Duration::from_secs(1) > held,
"Duration was {:?}, should have been close to 0s.",
held
);
}
}