morethantext-web/src/morethantext/mod.rs

380 lines
11 KiB
Rust

pub mod error;
use async_std::{
fs::{create_dir, read, write},
path::Path,
sync::{Arc, Mutex},
};
use error::DBError;
use rand::{distributions::Alphanumeric, thread_rng, Rng};
use std::{collections::HashMap, fmt, str, time::Instant};
const DATA: &str = "data";
#[derive(Clone)]
enum CacheType {
Raw(String),
}
impl CacheType {
fn entry_type(&self) -> String {
match self {
CacheType::Raw(_) => "Raw".to_string(),
}
}
fn to_bytes(&self) -> Vec<u8> {
let mut output = self.entry_type().into_bytes();
output.push(0);
match self {
CacheType::Raw(s) => output.append(&mut s.as_bytes().to_vec()),
}
return output;
}
fn from_bytes(data: Vec<u8>) -> CacheType {
let mut data_iter = data.iter();
let mut holder: u8 = *data_iter.next().unwrap();
while holder != 0 {
holder = *data_iter.next().unwrap();
}
let mut output: Vec<u8> = Vec::new();
for letter in data_iter {
output.push(letter.clone());
}
CacheType::Raw(str::from_utf8(&output).unwrap().to_string())
}
}
impl fmt::Display for CacheType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
CacheType::Raw(s) => write!(f, "{}", s),
}
}
}
#[derive(Clone)]
struct CacheEntry {
data: CacheType,
last_used: Instant,
}
impl CacheEntry {
fn new(data: CacheType) -> Self {
Self {
data: data,
last_used: Instant::now(),
}
}
}
impl fmt::Display for CacheEntry {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.data)
}
}
#[derive(Clone)]
pub struct MoreThanText {
cache: Arc<Mutex<HashMap<String, CacheEntry>>>,
dir: String,
}
impl MoreThanText {
pub async fn new(dir: &str) -> Result<Self, DBError> {
let data_dir = Path::new(dir).join(DATA);
if !data_dir.is_dir().await {
match create_dir(&data_dir).await {
Ok(_) => (),
Err(err) => {
let mut error = DBError::new("failed to create data directory");
error.add_source(err);
return Err(error);
}
}
}
Ok(Self {
cache: Arc::new(Mutex::new(HashMap::new())),
dir: data_dir.to_str().unwrap().to_string(),
})
}
async fn add_entry(&self, entry: CacheType) -> Result<String, DBError> {
let mut id: String = "".to_string();
let mut dup = true;
while dup {
id = thread_rng().sample_iter(&Alphanumeric).take(32).collect();
dup = Path::new(&self.dir).join(&id).as_path().exists().await;
}
match write(Path::new(&self.dir).join(&id), entry.to_bytes()).await {
Ok(_) => (),
Err(err) => {
let mut error = DBError::new("data write");
error.add_source(err);
return Err(error);
}
};
let mut cache = self.cache.lock().await;
let data = CacheEntry::new(entry);
cache.insert(id.clone(), data);
Ok(id)
}
async fn get_entry(&self, id: &str) -> Result<CacheEntry, DBError> {
let cache = self.cache.lock().await;
match cache.get(id) {
Some(id) => Ok(id.clone()),
None => match read(Path::new(&self.dir).join(&id)).await {
Ok(content) => {
let data = CacheType::from_bytes(content);
Ok(CacheEntry::new(data))
}
Err(_) => Err(DBError::new("cache entry not found")),
},
}
}
async fn update_entry(&self, id: &str, entry: CacheType) -> Result<(), DBError> {
match self.get_entry(id).await {
Ok(_) => (),
Err(err) => return Err(err),
}
match write(Path::new(&self.dir).join(&id), entry.to_bytes()).await {
Ok(_) => (),
Err(err) => {
let mut error = DBError::new("data write");
error.add_source(err);
return Err(error);
}
}
let mut cache = self.cache.lock().await;
let data = CacheEntry::new(entry);
cache.insert(id.to_string(), data);
Ok(())
}
}
#[cfg(test)]
mod setup {
use super::*;
use async_std::fs::remove_dir_all;
use tempfile::{tempdir, TempDir};
pub struct MTT {
pub db: MoreThanText,
pub dir: TempDir,
}
impl MTT {
pub async fn new() -> Self {
let dir = tempdir().unwrap();
let db = MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
Self { db: db, dir: dir }
}
pub async fn create_io_error(&self) {
remove_dir_all(self.dir.path().join(DATA)).await.unwrap();
}
}
}
#[cfg(test)]
mod init {
use super::*;
use std::error::Error;
use tempfile::tempdir;
#[async_std::test]
async fn create_data_dir() {
let dir = tempdir().unwrap();
MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
let data_dir = dir.path().join(DATA);
assert!(data_dir.is_dir(), "Did not create the data directory.");
dir.close().unwrap();
}
#[async_std::test]
async fn existing_data_dir() {
let dir = tempdir().unwrap();
let data_dir = dir.path().join(DATA);
create_dir(data_dir).await.unwrap();
MoreThanText::new(dir.path().to_str().unwrap())
.await
.unwrap();
dir.close().unwrap();
}
#[async_std::test]
async fn bad_data_dir() {
let msg = "could not create directory";
match MoreThanText::new("kljsdgfhslkfrh").await {
Ok(_) => assert!(false, "This test should fail to create a data directory"),
Err(err) => {
assert_eq!(err.to_string(), "failed to create data directory");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
};
}
}
#[cfg(test)]
mod cache {
use super::*;
use async_std::fs::read;
use setup::MTT;
use std::error::Error;
#[async_std::test]
async fn entry_ids_are_random() {
let mtt = MTT::new().await;
let data1 = CacheType::Raw("one".to_string());
let data2 = CacheType::Raw("two".to_string());
let id1 = mtt.db.add_entry(data1).await.unwrap();
let id2 = mtt.db.add_entry(data2).await.unwrap();
assert_ne!(id1, id2, "Ids should be unique.")
}
#[async_std::test]
async fn store_cache() {
let mtt = MTT::new().await;
let data = "something";
let expected = CacheType::Raw(data.to_string());
let id = mtt.db.add_entry(expected.clone()).await.unwrap();
let output = mtt.db.get_entry(&id).await.unwrap();
assert_eq!(output.to_string(), data);
let dfile = mtt.dir.path().join(DATA).join(&id);
assert!(dfile.is_file(), "Cache file should exist.");
let content = read(dfile).await.unwrap();
assert_eq!(content, expected.to_bytes());
}
#[async_std::test]
async fn retrieve_from_disk() {
let mtt = MTT::new().await;
let id = "someid";
let data = CacheType::Raw("stored".to_string());
write(mtt.dir.path().join(DATA).join(id), data.to_bytes())
.await
.unwrap();
let output = mtt.db.get_entry(id).await.unwrap();
assert_eq!(output.to_string(), data.to_string());
}
#[async_std::test]
async fn store_bad_file() {
let mtt = MTT::new().await;
let msg = "could not write to file";
mtt.create_io_error().await;
match mtt.db.add_entry(CacheType::Raw("fail".to_string())).await {
Ok(_) => assert!(false, "This test should fail."),
Err(err) => {
assert_eq!(err.to_string(), "data write");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
}
}
#[async_std::test]
async fn retrieve_bad_id() {
let mtt = MTT::new().await;
match mtt.db.get_entry(&"Not Valid").await {
Ok(_) => assert!(false, "Should have raised an error."),
Err(err) => assert_eq!(err.to_string(), "cache entry not found"),
}
}
#[async_std::test]
async fn update_cache_entry() {
let mtt = MTT::new().await;
let id = mtt
.db
.add_entry(CacheType::Raw("same".to_string()))
.await
.unwrap();
let expected = "different";
let expect = CacheType::Raw(expected.to_string());
mtt.db.update_entry(&id, expect.clone()).await.unwrap();
let output = mtt.db.get_entry(&id).await.unwrap();
assert_eq!(output.to_string(), expected);
let content = read(mtt.dir.path().join(DATA).join(id)).await.unwrap();
assert_eq!(content, expect.to_bytes());
}
#[async_std::test]
async fn update_bad_id() {
let mtt = MTT::new().await;
match mtt
.db
.update_entry("wilma", CacheType::Raw("wrong".to_string()))
.await
{
Ok(_) => assert!(false, "Bad id should raise an error."),
Err(err) => assert_eq!(err.to_string(), "cache entry not found"),
}
}
#[async_std::test]
async fn update_bad_file() {
let mtt = MTT::new().await;
let msg = "could not write to file";
let id = mtt
.db
.add_entry(CacheType::Raw("fleeting".to_string()))
.await
.unwrap();
mtt.create_io_error().await;
match mtt
.db
.update_entry(&id, CacheType::Raw("failure".to_string()))
.await
{
Ok(_) => assert!(false, "This should produce a write failure."),
Err(err) => {
assert_eq!(err.to_string(), "data write");
assert!(err.source().is_some(), "Must include the source error.");
let err_msg = err.source().unwrap().to_string();
assert!(err_msg.contains(msg), "'{}' not in '{}'", msg, err_msg);
}
}
}
}
#[cfg(test)]
mod cache_entry {
use super::*;
#[test]
fn raw_get_type() {
let holder = CacheType::Raw("nothing important".to_string());
assert_eq!(holder.entry_type(), "Raw");
}
#[test]
fn raw_get_bytes() {
let data = "addams";
let holder = CacheType::Raw(data.to_string());
let mut expected = holder.entry_type().into_bytes();
expected.push(0);
expected.append(&mut data.as_bytes().to_vec());
let output = holder.to_bytes();
assert_eq!(output, expected);
}
#[test]
fn raw_from_bytes() {
let holder = CacheType::Raw("stored item".to_string());
let data = holder.to_bytes();
let output = CacheType::from_bytes(data);
assert_eq!(output.to_string(), holder.to_string());
}
}