feat: TXT text reading - paragraph parsing, line/word stats
This commit is contained in:
parent
c390718c15
commit
af58a35b0a
@ -0,0 +1,94 @@
|
||||
use crate::blocks::DocumentBlock;
|
||||
use crate::error::DocumentError;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Read a TXT file and return its content as paragraph blocks.
|
||||
pub fn read_text(file_path: &str) -> Result<Vec<DocumentBlock>, DocumentError> {
|
||||
let content = std::fs::read_to_string(file_path)?;
|
||||
Ok(parse_text_content(&content))
|
||||
}
|
||||
|
||||
/// Parse a string as text content, splitting by blank lines into paragraphs.
|
||||
pub fn parse_text_content(content: &str) -> Vec<DocumentBlock> {
|
||||
let mut blocks = Vec::new();
|
||||
|
||||
for paragraph in content.split("\n\n") {
|
||||
let text = paragraph.trim().to_string();
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
blocks.push(DocumentBlock::Paragraph {
|
||||
id: Uuid::new_v4().to_string(),
|
||||
text,
|
||||
});
|
||||
}
|
||||
|
||||
blocks
|
||||
}
|
||||
|
||||
/// Return line count and word count for a text string.
|
||||
pub fn text_stats(content: &str) -> TextStats {
|
||||
let line_count = content.lines().count() as u32;
|
||||
let word_count = content.split_whitespace().count() as u32;
|
||||
TextStats {
|
||||
line_count,
|
||||
word_count,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct TextStats {
|
||||
pub line_count: u32,
|
||||
pub word_count: u32,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_empty_text() {
|
||||
let blocks = parse_text_content("");
|
||||
assert!(blocks.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_paragraph() {
|
||||
let blocks = parse_text_content("Hello world.");
|
||||
assert_eq!(blocks.len(), 1);
|
||||
if let DocumentBlock::Paragraph { text, .. } = &blocks[0] {
|
||||
assert_eq!(text, "Hello world.");
|
||||
} else {
|
||||
panic!("expected paragraph");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_paragraphs() {
|
||||
let blocks = parse_text_content("First paragraph.\n\nSecond paragraph.\n\nThird paragraph.");
|
||||
assert_eq!(blocks.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trim_whitespace() {
|
||||
let blocks = parse_text_content(" spaced text \n\n another ");
|
||||
assert_eq!(blocks.len(), 2);
|
||||
if let DocumentBlock::Paragraph { text, .. } = &blocks[0] {
|
||||
assert_eq!(text, "spaced text");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stats() {
|
||||
let content = "Hello world\nThis is a test.\n\nThird line.";
|
||||
let stats = text_stats(content);
|
||||
assert_eq!(stats.line_count, 4); // 3 text lines + 1 empty
|
||||
assert_eq!(stats.word_count, 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_file_not_found() {
|
||||
let result = read_text("/nonexistent/file.txt");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user