feat: TXT text reading - paragraph parsing, line/word stats

This commit is contained in:
wangdl 2026-05-30 20:55:30 +08:00
parent c390718c15
commit af58a35b0a

View File

@ -0,0 +1,94 @@
use crate::blocks::DocumentBlock;
use crate::error::DocumentError;
use uuid::Uuid;
/// Read a TXT file and return its content as paragraph blocks.
pub fn read_text(file_path: &str) -> Result<Vec<DocumentBlock>, DocumentError> {
let content = std::fs::read_to_string(file_path)?;
Ok(parse_text_content(&content))
}
/// Parse a string as text content, splitting by blank lines into paragraphs.
pub fn parse_text_content(content: &str) -> Vec<DocumentBlock> {
let mut blocks = Vec::new();
for paragraph in content.split("\n\n") {
let text = paragraph.trim().to_string();
if text.is_empty() {
continue;
}
blocks.push(DocumentBlock::Paragraph {
id: Uuid::new_v4().to_string(),
text,
});
}
blocks
}
/// Return line count and word count for a text string.
pub fn text_stats(content: &str) -> TextStats {
let line_count = content.lines().count() as u32;
let word_count = content.split_whitespace().count() as u32;
TextStats {
line_count,
word_count,
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct TextStats {
pub line_count: u32,
pub word_count: u32,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_text() {
let blocks = parse_text_content("");
assert!(blocks.is_empty());
}
#[test]
fn test_single_paragraph() {
let blocks = parse_text_content("Hello world.");
assert_eq!(blocks.len(), 1);
if let DocumentBlock::Paragraph { text, .. } = &blocks[0] {
assert_eq!(text, "Hello world.");
} else {
panic!("expected paragraph");
}
}
#[test]
fn test_multiple_paragraphs() {
let blocks = parse_text_content("First paragraph.\n\nSecond paragraph.\n\nThird paragraph.");
assert_eq!(blocks.len(), 3);
}
#[test]
fn test_trim_whitespace() {
let blocks = parse_text_content(" spaced text \n\n another ");
assert_eq!(blocks.len(), 2);
if let DocumentBlock::Paragraph { text, .. } = &blocks[0] {
assert_eq!(text, "spaced text");
}
}
#[test]
fn test_stats() {
let content = "Hello world\nThis is a test.\n\nThird line.";
let stats = text_stats(content);
assert_eq!(stats.line_count, 4); // 3 text lines + 1 empty
assert_eq!(stats.word_count, 8);
}
#[test]
fn test_file_not_found() {
let result = read_text("/nonexistent/file.txt");
assert!(result.is_err());
}
}