feat: case-insensitive search for Markdown blocks and TXT text

This commit is contained in:
wangdl 2026-05-30 21:58:59 +08:00
parent b5f8e273a9
commit 65bc52bfd7

View File

@ -1,5 +1,9 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::blocks::DocumentBlock;
const SNIPPET_RADIUS: usize = 40;
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult { pub struct SearchResult {
pub block_id: String, pub block_id: String,
@ -8,3 +12,179 @@ pub struct SearchResult {
pub match_start: usize, pub match_start: usize,
pub match_end: usize, pub match_end: usize,
} }
/// Search within a list of DocumentBlock (Markdown).
pub fn search_blocks(blocks: &[DocumentBlock], query: &str) -> Vec<SearchResult> {
if query.is_empty() {
return Vec::new();
}
let query = query.to_lowercase();
let mut results = Vec::new();
for block in blocks {
let (block_id, text) = block_text(block);
if text.is_empty() {
continue;
}
let lower = text.to_lowercase();
let mut start = 0;
while let Some(pos) = lower[start..].find(&query) {
let abs_start = start + pos;
let abs_end = abs_start + query.len();
let snippet_start = abs_start.saturating_sub(SNIPPET_RADIUS);
let snippet_end = (abs_end + SNIPPET_RADIUS).min(text.len());
let snippet = if snippet_start > 0 {
format!("{}", &text[snippet_start..snippet_end])
} else {
text[snippet_start..snippet_end].to_string()
};
results.push(SearchResult {
block_id: block_id.to_string(),
line_number: None,
snippet,
match_start: abs_start - snippet_start,
match_end: abs_end - snippet_start,
});
start = abs_end;
if start >= lower.len() {
break;
}
}
}
results
}
/// Search within a plain text string (TXT).
pub fn search_text(content: &str, query: &str) -> Vec<SearchResult> {
if query.is_empty() {
return Vec::new();
}
let query = query.to_lowercase();
let lower = content.to_lowercase();
let mut results = Vec::new();
let mut start = 0;
while let Some(pos) = lower[start..].find(&query) {
let abs_start = start + pos;
let abs_end = abs_start + query.len();
// Determine line number
let line_number = content[..abs_start].lines().count() as u32;
let snippet_start = abs_start.saturating_sub(SNIPPET_RADIUS);
let snippet_end = (abs_end + SNIPPET_RADIUS).min(content.len());
let snippet = if snippet_start > 0 {
format!("{}", &content[snippet_start..snippet_end])
} else {
content[snippet_start..snippet_end].to_string()
};
results.push(SearchResult {
block_id: format!("line-{line_number}"),
line_number: Some(line_number),
snippet,
match_start: abs_start - snippet_start,
match_end: abs_end - snippet_start,
});
start = abs_end;
if start >= lower.len() {
break;
}
}
results
}
fn block_text(block: &DocumentBlock) -> (&str, String) {
match block {
DocumentBlock::Heading { id, text, .. } => (id, text.clone()),
DocumentBlock::Paragraph { id, text, .. } => (id, text.clone()),
DocumentBlock::List { id, items, .. } => (id, items.join(" ")),
DocumentBlock::CodeBlock { id, code, .. } => (id, code.clone()),
DocumentBlock::Quote { id, text, .. } => (id, text.clone()),
DocumentBlock::Table { id, headers, rows } => {
let mut text = headers.join(" ");
for row in rows {
text.push(' ');
text.push_str(&row.join(" "));
}
(id, text)
}
DocumentBlock::Image { id, alt, .. } => (id, alt.clone().unwrap_or_default()),
DocumentBlock::HorizontalRule { .. } => ("", String::new()),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::markdown;
#[test]
fn test_search_blocks_basic() {
let md = "# Hello\n\nThis is a test document.\n\nMore content here.";
let blocks = markdown::parse_markdown(md).unwrap();
let results = search_blocks(&blocks, "test");
assert!(!results.is_empty());
assert!(results[0].snippet.to_lowercase().contains("test"));
}
#[test]
fn test_search_blocks_case_insensitive() {
let md = "Hello WORLD";
let blocks = markdown::parse_markdown(md).unwrap();
let lower = search_blocks(&blocks, "world");
let upper = search_blocks(&blocks, "WORLD");
assert_eq!(lower.len(), upper.len());
}
#[test]
fn test_search_blocks_no_match() {
let md = "# Title\n\nParagraph.";
let blocks = markdown::parse_markdown(md).unwrap();
let results = search_blocks(&blocks, "nonexistent");
assert!(results.is_empty());
}
#[test]
fn test_search_blocks_empty_query() {
let md = "Some text.";
let blocks = markdown::parse_markdown(md).unwrap();
let results = search_blocks(&blocks, "");
assert!(results.is_empty());
}
#[test]
fn test_search_text_basic() {
let content = "Line one\nLine two\nLine three";
let results = search_text(content, "two");
assert_eq!(results.len(), 1);
assert_eq!(results[0].line_number, Some(2));
}
#[test]
fn test_search_text_multiple_matches() {
let content = "hello world hello world hello world";
let results = search_text(content, "hello");
assert_eq!(results.len(), 3);
}
#[test]
fn test_search_text_no_match() {
let content = "just text";
let results = search_text(content, "xyz");
assert!(results.is_empty());
}
#[test]
fn test_search_text_empty_query() {
let content = "text";
let results = search_text(content, "");
assert!(results.is_empty());
}
}