diff --git a/crates/zx_document_core/Cargo.toml b/crates/zx_document_core/Cargo.toml index 48372f4..70bb2a4 100644 --- a/crates/zx_document_core/Cargo.toml +++ b/crates/zx_document_core/Cargo.toml @@ -6,3 +6,5 @@ edition = "2021" [dependencies] serde = { version = "1", features = ["derive"] } serde_json = "1" +infer = "0.16" +mime_guess = "2" diff --git a/crates/zx_document_core/src/material_type.rs b/crates/zx_document_core/src/material_type.rs index 85ca0a1..926ff15 100644 --- a/crates/zx_document_core/src/material_type.rs +++ b/crates/zx_document_core/src/material_type.rs @@ -1,5 +1,9 @@ +use std::path::Path; + use serde::{Deserialize, Serialize}; +use crate::error::DocumentError; + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum MaterialType { Markdown, @@ -20,3 +24,188 @@ pub enum PreviewMode { ExternalOpen, Unsupported, } + +impl MaterialType { + pub fn preview_mode(&self) -> PreviewMode { + match self { + MaterialType::Markdown | MaterialType::Text | MaterialType::Image => { + PreviewMode::NativeReader + } + MaterialType::Pdf | MaterialType::Word | MaterialType::Excel => { + PreviewMode::PlatformPreview + } + MaterialType::PowerPoint => PreviewMode::ExternalOpen, + MaterialType::Epub => PreviewMode::NativeReader, + MaterialType::Unknown => PreviewMode::Unsupported, + } + } +} + +/// Detect MaterialType from a file path. +/// +/// Strategy: magic bytes → MIME → extension +pub fn detect_material_type(file_path: &str) -> Result { + let path = Path::new(file_path); + + // 1. Read file header for magic bytes detection + if let Ok(buf) = std::fs::read(file_path) { + if let Some(info) = infer::get(&buf) { + let inferred = match info.mime_type() { + "application/pdf" => Some(MaterialType::Pdf), + "image/png" | "image/jpeg" | "image/webp" | "image/gif" => Some(MaterialType::Image), + "application/epub+zip" => Some(MaterialType::Epub), + "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + | "application/msword" => Some(MaterialType::Word), + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + | "application/vnd.ms-excel" => Some(MaterialType::Excel), + "application/vnd.openxmlformats-officedocument.presentationml.presentation" + | "application/vnd.ms-powerpoint" => Some(MaterialType::PowerPoint), + _ => None, + }; + if let Some(mt) = inferred { + return Ok(mt); + } + } + } + + // 2. Try MIME guess from extension + if let Some(ext) = path.extension().and_then(|e| e.to_str()) { + let mime = mime_guess::from_ext(ext).first_or_octet_stream(); + match mime.type_() { + mime_guess::mime::TEXT => { + if mime.subtype().as_str() == "markdown" + || mime.subtype().as_str() == "x-markdown" + { + return Ok(MaterialType::Markdown); + } + return match ext { + "md" | "markdown" => Ok(MaterialType::Markdown), + "txt" | "text" => Ok(MaterialType::Text), + "html" | "htm" | "css" | "js" | "ts" | "rs" | "py" | "java" + | "c" | "cpp" | "h" | "hpp" | "swift" | "kt" | "xml" | "json" + | "yaml" | "yml" | "toml" => Ok(MaterialType::Text), + _ => Ok(MaterialType::Unknown), + }; + } + mime_guess::mime::APPLICATION => { + return match ext { + "pdf" => Ok(MaterialType::Pdf), + "epub" => Ok(MaterialType::Epub), + "doc" | "docx" => Ok(MaterialType::Word), + "xls" | "xlsx" => Ok(MaterialType::Excel), + "ppt" | "pptx" => Ok(MaterialType::PowerPoint), + _ => Ok(MaterialType::Unknown), + }; + } + mime_guess::mime::IMAGE => return Ok(MaterialType::Image), + _ => {} + } + } + + // 3. Extension fallback + if let Some(ext) = path.extension().and_then(|e| e.to_str()) { + return Ok(match ext { + "md" | "markdown" => MaterialType::Markdown, + "txt" | "text" => MaterialType::Text, + "pdf" => MaterialType::Pdf, + "png" | "jpg" | "jpeg" | "webp" | "gif" | "bmp" | "svg" => MaterialType::Image, + "epub" => MaterialType::Epub, + "doc" | "docx" => MaterialType::Word, + "xls" | "xlsx" => MaterialType::Excel, + "ppt" | "pptx" => MaterialType::PowerPoint, + _ => MaterialType::Unknown, + }); + } + + Ok(MaterialType::Unknown) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_detect_markdown() { + let t = detect_material_type("test.md").unwrap(); + assert_eq!(t, MaterialType::Markdown); + } + + #[test] + fn test_detect_txt() { + let t = detect_material_type("notes.txt").unwrap(); + assert_eq!(t, MaterialType::Text); + } + + #[test] + fn test_detect_pdf() { + let t = detect_material_type("doc.pdf").unwrap(); + assert_eq!(t, MaterialType::Pdf); + } + + #[test] + fn test_detect_image() { + assert_eq!( + detect_material_type("photo.png").unwrap(), + MaterialType::Image + ); + assert_eq!( + detect_material_type("photo.jpg").unwrap(), + MaterialType::Image + ); + assert_eq!( + detect_material_type("photo.jpeg").unwrap(), + MaterialType::Image + ); + } + + #[test] + fn test_detect_epub() { + let t = detect_material_type("book.epub").unwrap(); + assert_eq!(t, MaterialType::Epub); + } + + #[test] + fn test_detect_office() { + assert_eq!( + detect_material_type("report.docx").unwrap(), + MaterialType::Word + ); + assert_eq!( + detect_material_type("sheet.xlsx").unwrap(), + MaterialType::Excel + ); + assert_eq!( + detect_material_type("deck.pptx").unwrap(), + MaterialType::PowerPoint + ); + } + + #[test] + fn test_detect_unknown() { + let t = detect_material_type("weird.xyz").unwrap(); + assert_eq!(t, MaterialType::Unknown); + } + + #[test] + fn test_detect_no_extension() { + let t = detect_material_type("README").unwrap(); + assert_eq!(t, MaterialType::Unknown); + } + + #[test] + fn test_preview_mode() { + assert_eq!( + MaterialType::Markdown.preview_mode(), + PreviewMode::NativeReader + ); + assert_eq!(MaterialType::Pdf.preview_mode(), PreviewMode::PlatformPreview); + assert_eq!( + MaterialType::PowerPoint.preview_mode(), + PreviewMode::ExternalOpen + ); + assert_eq!( + MaterialType::Unknown.preview_mode(), + PreviewMode::Unsupported + ); + } +}