From af58a35b0af5616489e0e2fc1c1df11816439a96 Mon Sep 17 00:00:00 2001 From: wangdl Date: Sat, 30 May 2026 20:55:30 +0800 Subject: [PATCH] feat: TXT text reading - paragraph parsing, line/word stats --- crates/zx_document_core/src/text.rs | 94 +++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/crates/zx_document_core/src/text.rs b/crates/zx_document_core/src/text.rs index e69de29..826b5f8 100644 --- a/crates/zx_document_core/src/text.rs +++ b/crates/zx_document_core/src/text.rs @@ -0,0 +1,94 @@ +use crate::blocks::DocumentBlock; +use crate::error::DocumentError; +use uuid::Uuid; + +/// Read a TXT file and return its content as paragraph blocks. +pub fn read_text(file_path: &str) -> Result, DocumentError> { + let content = std::fs::read_to_string(file_path)?; + Ok(parse_text_content(&content)) +} + +/// Parse a string as text content, splitting by blank lines into paragraphs. +pub fn parse_text_content(content: &str) -> Vec { + let mut blocks = Vec::new(); + + for paragraph in content.split("\n\n") { + let text = paragraph.trim().to_string(); + if text.is_empty() { + continue; + } + blocks.push(DocumentBlock::Paragraph { + id: Uuid::new_v4().to_string(), + text, + }); + } + + blocks +} + +/// Return line count and word count for a text string. +pub fn text_stats(content: &str) -> TextStats { + let line_count = content.lines().count() as u32; + let word_count = content.split_whitespace().count() as u32; + TextStats { + line_count, + word_count, + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct TextStats { + pub line_count: u32, + pub word_count: u32, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_text() { + let blocks = parse_text_content(""); + assert!(blocks.is_empty()); + } + + #[test] + fn test_single_paragraph() { + let blocks = parse_text_content("Hello world."); + assert_eq!(blocks.len(), 1); + if let DocumentBlock::Paragraph { text, .. } = &blocks[0] { + assert_eq!(text, "Hello world."); + } else { + panic!("expected paragraph"); + } + } + + #[test] + fn test_multiple_paragraphs() { + let blocks = parse_text_content("First paragraph.\n\nSecond paragraph.\n\nThird paragraph."); + assert_eq!(blocks.len(), 3); + } + + #[test] + fn test_trim_whitespace() { + let blocks = parse_text_content(" spaced text \n\n another "); + assert_eq!(blocks.len(), 2); + if let DocumentBlock::Paragraph { text, .. } = &blocks[0] { + assert_eq!(text, "spaced text"); + } + } + + #[test] + fn test_stats() { + let content = "Hello world\nThis is a test.\n\nThird line."; + let stats = text_stats(content); + assert_eq!(stats.line_count, 4); // 3 text lines + 1 empty + assert_eq!(stats.word_count, 8); + } + + #[test] + fn test_file_not_found() { + let result = read_text("/nonexistent/file.txt"); + assert!(result.is_err()); + } +}