diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..35049cb --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[alias] +xtask = "run --package xtask --" diff --git a/README.md b/README.md index cdf169f..d645df8 100644 --- a/README.md +++ b/README.md @@ -151,16 +151,12 @@ EPUB 不是 Word 那类复杂办公格式,它本质上更接近打包的 Web ## 7. 推荐 Rust 依赖包 -### 7.1 Core 基础依赖 +### 7.1 Core 基础依赖(已使用) ```toml serde = { version = "1", features = ["derive"] } serde_json = "1" -thiserror = "2" -anyhow = "1" -tracing = "0.1" -uuid = { version = "1", features = ["v4", "serde"] } -time = { version = "0.3", features = ["serde"] } +uuid = { version = "1", features = ["v4"] } ``` 用途: @@ -168,14 +164,10 @@ time = { version = "0.3", features = ["serde"] } | 包 | 用途 | | ------------ | --------------------- | | `serde` | 数据结构序列化 / 反序列化 | -| `serde_json` | JSON 协议、调试输出、事件序列化 | -| `thiserror` | 定义稳定错误类型 | -| `anyhow` | CLI / demo / 内部错误快速处理 | -| `tracing` | 日志与诊断 | -| `uuid` | 事件 ID、session ID | -| `time` | 时间戳、阅读事件时间 | +| `serde_json` | JSON 协议、事件序列化 | +| `uuid` | block ID 生成(v4 随机 UUID) | -Serde 是 Rust 生态常用的序列化 / 反序列化框架,适合把 Rust 数据结构转换成 JSON 传给 Swift/Kotlin。 +后续可能需要 `thiserror`(derive Error trait)、`tracing`(日志诊断)、`time`(时间戳序列化),按需引入即可。 --- @@ -227,8 +219,9 @@ pub enum DocumentBlock { List { id: String, ordered: bool, items: Vec }, CodeBlock { id: String, language: Option, code: String }, Quote { id: String, text: String }, - Table { id: String, rows: Vec> }, + Table { id: String, headers: Vec, rows: Vec> }, Image { id: String, src: String, alt: Option }, + HorizontalRule { id: String }, } ``` @@ -457,10 +450,9 @@ zhixi-document-runtime/ │ ├── supported-formats.md │ ├── event-protocol.md │ ├── reading-position-model.md -│ ├── note-anchor-model.md +│ ├── pdf-strategy.md │ ├── ios-integration.md -│ ├── android-integration.md -│ └── roadmap.md +│ └── app-rust-bridge.md │ └── scripts/ ├── build-ios.sh diff --git a/bindings/ios/ZxDocumentRuntime.xcframework/Info.plist b/bindings/ios/ZxDocumentRuntime.xcframework/Info.plist index 2dce6ca..612f37c 100644 --- a/bindings/ios/ZxDocumentRuntime.xcframework/Info.plist +++ b/bindings/ios/ZxDocumentRuntime.xcframework/Info.plist @@ -4,20 +4,6 @@ AvailableLibraries - - BinaryPath - libzx_document_ffi.a - LibraryIdentifier - ios-arm64 - LibraryPath - libzx_document_ffi.a - SupportedArchitectures - - arm64 - - SupportedPlatform - ios - BinaryPath libzx_document_ffi.a @@ -34,6 +20,20 @@ SupportedPlatformVariant simulator + + BinaryPath + libzx_document_ffi.a + LibraryIdentifier + ios-arm64 + LibraryPath + libzx_document_ffi.a + SupportedArchitectures + + arm64 + + SupportedPlatform + ios + CFBundlePackageType XFWK diff --git a/bindings/ios/ZxDocumentRuntime.xcframework/ios-arm64-simulator/libzx_document_ffi.a b/bindings/ios/ZxDocumentRuntime.xcframework/ios-arm64-simulator/libzx_document_ffi.a index 1cae318..2025311 100644 Binary files a/bindings/ios/ZxDocumentRuntime.xcframework/ios-arm64-simulator/libzx_document_ffi.a and b/bindings/ios/ZxDocumentRuntime.xcframework/ios-arm64-simulator/libzx_document_ffi.a differ diff --git a/bindings/ios/ZxDocumentRuntime.xcframework/ios-arm64/libzx_document_ffi.a b/bindings/ios/ZxDocumentRuntime.xcframework/ios-arm64/libzx_document_ffi.a index 08d635c..45688df 100644 Binary files a/bindings/ios/ZxDocumentRuntime.xcframework/ios-arm64/libzx_document_ffi.a and b/bindings/ios/ZxDocumentRuntime.xcframework/ios-arm64/libzx_document_ffi.a differ diff --git a/bindings/ios/device/libzx_document_ffi.a b/bindings/ios/device/libzx_document_ffi.a index 08d635c..45688df 100644 Binary files a/bindings/ios/device/libzx_document_ffi.a and b/bindings/ios/device/libzx_document_ffi.a differ diff --git a/bindings/ios/generated/zx_document.swift b/bindings/ios/generated/zx_document.swift index 238f4cd..fa97011 100644 --- a/bindings/ios/generated/zx_document.swift +++ b/bindings/ios/generated/zx_document.swift @@ -1723,6 +1723,31 @@ fileprivate struct FfiConverterSequenceString: FfiConverterRustBuffer { } } +#if swift(>=5.8) +@_documentation(visibility: private) +#endif +fileprivate struct FfiConverterSequenceTypeSearchResult: FfiConverterRustBuffer { + typealias SwiftType = [SearchResult] + + public static func write(_ value: [SearchResult], into buf: inout [UInt8]) { + let len = Int32(value.count) + writeInt(&buf, len) + for item in value { + FfiConverterTypeSearchResult.write(item, into: &buf) + } + } + + public static func read(from buf: inout (data: Data, offset: Data.Index)) throws -> [SearchResult] { + let len: Int32 = try readInt(&buf) + var seq = [SearchResult]() + seq.reserveCapacity(Int(len)) + for _ in 0 ..< len { + seq.append(try FfiConverterTypeSearchResult.read(from: &buf)) + } + return seq + } +} + #if swift(>=5.8) @_documentation(visibility: private) #endif @@ -1748,6 +1773,31 @@ fileprivate struct FfiConverterSequenceTypeDocumentBlock: FfiConverterRustBuffer } } +#if swift(>=5.8) +@_documentation(visibility: private) +#endif +fileprivate struct FfiConverterSequenceTypeReadingEvent: FfiConverterRustBuffer { + typealias SwiftType = [ReadingEvent] + + public static func write(_ value: [ReadingEvent], into buf: inout [UInt8]) { + let len = Int32(value.count) + writeInt(&buf, len) + for item in value { + FfiConverterTypeReadingEvent.write(item, into: &buf) + } + } + + public static func read(from buf: inout (data: Data, offset: Data.Index)) throws -> [ReadingEvent] { + let len: Int32 = try readInt(&buf) + var seq = [ReadingEvent]() + seq.reserveCapacity(Int(len)) + for _ in 0 ..< len { + seq.append(try FfiConverterTypeReadingEvent.read(from: &buf)) + } + return seq + } +} + #if swift(>=5.8) @_documentation(visibility: private) #endif @@ -1772,6 +1822,20 @@ fileprivate struct FfiConverterSequenceSequenceString: FfiConverterRustBuffer { return seq } } +public func clearExportedEvents(count: UInt32) {try! rustCall() { + uniffi_zx_document_ffi_fn_func_clear_exported_events( + FfiConverterUInt32.lower(count),$0 + ) +} +} +public func createNoteAnchor(materialId: String, position: ReadingPosition?) -> NoteAnchor { + return try! FfiConverterTypeNoteAnchor_lift(try! rustCall() { + uniffi_zx_document_ffi_fn_func_create_note_anchor( + FfiConverterString.lower(materialId), + FfiConverterOptionTypeReadingPosition.lower(position),$0 + ) +}) +} public func detectMaterialType(filePath: String)throws -> MaterialType { return try FfiConverterTypeMaterialType_lift(try rustCallWithError(FfiConverterTypeDocumentError_lift) { uniffi_zx_document_ffi_fn_func_detect_material_type( @@ -1779,6 +1843,12 @@ public func detectMaterialType(filePath: String)throws -> MaterialType { ) }) } +public func exportPendingEvents() -> [ReadingEvent] { + return try! FfiConverterSequenceTypeReadingEvent.lift(try! rustCall() { + uniffi_zx_document_ffi_fn_func_export_pending_events($0 + ) +}) +} public func parseMarkdown(content: String)throws -> [DocumentBlock] { return try FfiConverterSequenceTypeDocumentBlock.lift(try rustCallWithError(FfiConverterTypeDocumentError_lift) { uniffi_zx_document_ffi_fn_func_parse_markdown( @@ -1793,6 +1863,12 @@ public func parseText(content: String)throws -> [DocumentBlock] { ) }) } +public func pushReadingEvent(event: ReadingEvent) {try! rustCall() { + uniffi_zx_document_ffi_fn_func_push_reading_event( + FfiConverterTypeReadingEvent_lower(event),$0 + ) +} +} public func readImageMeta(filePath: String)throws -> ImageMeta { return try FfiConverterTypeImageMeta_lift(try rustCallWithError(FfiConverterTypeDocumentError_lift) { uniffi_zx_document_ffi_fn_func_read_image_meta( @@ -1807,6 +1883,29 @@ public func readTextStats(filePath: String)throws -> TextStats { ) }) } +public func searchMarkdownBlocks(blocks: [DocumentBlock], query: String) -> [SearchResult] { + return try! FfiConverterSequenceTypeSearchResult.lift(try! rustCall() { + uniffi_zx_document_ffi_fn_func_search_markdown_blocks( + FfiConverterSequenceTypeDocumentBlock.lower(blocks), + FfiConverterString.lower(query),$0 + ) +}) +} +public func searchTextContent(content: String, query: String) -> [SearchResult] { + return try! FfiConverterSequenceTypeSearchResult.lift(try! rustCall() { + uniffi_zx_document_ffi_fn_func_search_text_content( + FfiConverterString.lower(content), + FfiConverterString.lower(query),$0 + ) +}) +} +public func updateReadingPosition(materialId: String, position: ReadingPosition) {try! rustCall() { + uniffi_zx_document_ffi_fn_func_update_reading_position( + FfiConverterString.lower(materialId), + FfiConverterTypeReadingPosition_lower(position),$0 + ) +} +} private enum InitializationResult { case ok @@ -1823,21 +1922,42 @@ private let initializationResult: InitializationResult = { if bindings_contract_version != scaffolding_contract_version { return InitializationResult.contractVersionMismatch } + if (uniffi_zx_document_ffi_checksum_func_clear_exported_events() != 48081) { + return InitializationResult.apiChecksumMismatch + } + if (uniffi_zx_document_ffi_checksum_func_create_note_anchor() != 12864) { + return InitializationResult.apiChecksumMismatch + } if (uniffi_zx_document_ffi_checksum_func_detect_material_type() != 55020) { return InitializationResult.apiChecksumMismatch } + if (uniffi_zx_document_ffi_checksum_func_export_pending_events() != 40963) { + return InitializationResult.apiChecksumMismatch + } if (uniffi_zx_document_ffi_checksum_func_parse_markdown() != 11780) { return InitializationResult.apiChecksumMismatch } if (uniffi_zx_document_ffi_checksum_func_parse_text() != 32792) { return InitializationResult.apiChecksumMismatch } + if (uniffi_zx_document_ffi_checksum_func_push_reading_event() != 28701) { + return InitializationResult.apiChecksumMismatch + } if (uniffi_zx_document_ffi_checksum_func_read_image_meta() != 62824) { return InitializationResult.apiChecksumMismatch } if (uniffi_zx_document_ffi_checksum_func_read_text_stats() != 43426) { return InitializationResult.apiChecksumMismatch } + if (uniffi_zx_document_ffi_checksum_func_search_markdown_blocks() != 20719) { + return InitializationResult.apiChecksumMismatch + } + if (uniffi_zx_document_ffi_checksum_func_search_text_content() != 35708) { + return InitializationResult.apiChecksumMismatch + } + if (uniffi_zx_document_ffi_checksum_func_update_reading_position() != 19288) { + return InitializationResult.apiChecksumMismatch + } return InitializationResult.ok }() diff --git a/bindings/ios/simulator/libzx_document_ffi.a b/bindings/ios/simulator/libzx_document_ffi.a index 1cae318..2025311 100644 Binary files a/bindings/ios/simulator/libzx_document_ffi.a and b/bindings/ios/simulator/libzx_document_ffi.a differ diff --git a/crates/xtask/src/main.rs b/crates/xtask/src/main.rs index abcca5f..3f39f82 100644 --- a/crates/xtask/src/main.rs +++ b/crates/xtask/src/main.rs @@ -1,3 +1,49 @@ +use std::env; +use std::process::Command; + fn main() { - println!("zhixi-document-runtime xtask"); + let args: Vec = env::args().skip(1).collect(); + match args.first().map(String::as_str) { + Some("test") => run_cargo("test", &[]), + Some("build-ios") => { + let script = project_root().join("scripts/build-ios.sh"); + let status = Command::new("bash") + .arg(script) + .status() + .expect("failed to run build-ios.sh"); + std::process::exit(status.code().unwrap_or(1)); + } + Some("fixtures") => { + println!("fixtures/"); + println!(" markdown/sample.md — all block types (heading, paragraph, list, table, code, quote, hr)"); + println!(" text/sample.txt — multi-paragraph plain text"); + println!(" images/test-red.png — 1×1 red pixel PNG"); + } + _ => { + println!("zhixi-document-runtime xtask"); + println!(); + println!("Usage: cargo run -p xtask -- "); + println!(); + println!("Commands:"); + println!(" test Run all Rust tests (cargo test)"); + println!(" build-ios Build iOS XCFramework + Swift bindings"); + println!(" fixtures List available fixture files"); + } + } +} + +fn project_root() -> std::path::PathBuf { + let manifest_dir = env!("CARGO_MANIFEST_DIR"); + std::path::Path::new(manifest_dir).parent().unwrap().parent().unwrap().to_path_buf() +} + +fn run_cargo(cmd: &str, args: &[&str]) { + let mut child = Command::new("cargo") + .arg(cmd) + .args(args) + .current_dir(project_root()) + .spawn() + .expect("failed to start cargo"); + let status = child.wait().expect("failed to wait on cargo"); + std::process::exit(status.code().unwrap_or(1)); } diff --git a/crates/zx_document_core/src/epub.rs b/crates/zx_document_core/src/epub.rs index e69de29..0499f43 100644 --- a/crates/zx_document_core/src/epub.rs +++ b/crates/zx_document_core/src/epub.rs @@ -0,0 +1,4 @@ +// M5: EPUB structure parsing (OPF, spine, nav, chapter list). +// Rust Core will parse the EPUB container and expose chapter-level metadata. +// Chapter HTML rendering is delegated to the host app's WebView / native HTML. +// See #25 DOC-501 and #26 DOC-502 for design and implementation plan. diff --git a/crates/zx_document_core/src/events.rs b/crates/zx_document_core/src/events.rs index beea97f..56f4a93 100644 --- a/crates/zx_document_core/src/events.rs +++ b/crates/zx_document_core/src/events.rs @@ -1,7 +1,50 @@ +use std::sync::Mutex; + use serde::{Deserialize, Serialize}; use crate::progress::ReadingPosition; +// Global event buffer, protected by a Mutex for thread safety. +static EVENT_BUFFER: Mutex> = Mutex::new(Vec::new()); + +/// Push a reading event into the global buffer. +pub fn push_reading_event(event: ReadingEvent) { + if let Ok(mut buf) = EVENT_BUFFER.lock() { + buf.push(event); + } +} + +/// Record a position change as a PositionChanged event. +pub fn update_reading_position(material_id: &str, position: ReadingPosition) { + let event = ReadingEvent::PositionChanged { + material_id: material_id.to_string(), + position, + timestamp_ms: now_ms(), + }; + push_reading_event(event); +} + +/// Export all pending events without clearing. +pub fn export_pending_events() -> Vec { + EVENT_BUFFER.lock().map(|buf| buf.clone()).unwrap_or_default() +} + +/// Remove the first `count` events after successful upload. +pub fn clear_exported_events(count: usize) { + if let Ok(mut buf) = EVENT_BUFFER.lock() { + let n = count.min(buf.len()); + buf.drain(..n); + } +} + +fn now_ms() -> i64 { + use std::time::SystemTime; + SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .map(|d| d.as_millis() as i64) + .unwrap_or(0) +} + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "type")] pub enum ReadingEvent { @@ -34,6 +77,14 @@ pub enum ReadingEvent { #[cfg(test)] mod tests { use super::*; + use std::sync::Mutex; + + // Serialize event buffer tests to prevent races on the global EVENT_BUFFER. + static TEST_LOCK: Mutex<()> = Mutex::new(()); + + fn setup() { + clear_exported_events(usize::MAX); + } #[test] fn test_material_opened_serde() { @@ -107,4 +158,62 @@ mod tests { let back: ReadingEvent = serde_json::from_str(&json).unwrap(); assert_eq!(back, e); } + + #[test] + fn test_push_and_export() { + let _guard = TEST_LOCK.lock().unwrap(); + setup(); + push_reading_event(ReadingEvent::MaterialOpened { + material_id: "m1".into(), + timestamp_ms: 1000, + }); + push_reading_event(ReadingEvent::MarkedAsRead { + material_id: "m1".into(), + timestamp_ms: 2000, + }); + let events = export_pending_events(); + assert_eq!(events.len(), 2); + } + + #[test] + fn test_clear_exported() { + let _guard = TEST_LOCK.lock().unwrap(); + setup(); + push_reading_event(ReadingEvent::MaterialOpened { + material_id: "m1".into(), + timestamp_ms: 1000, + }); + push_reading_event(ReadingEvent::MaterialOpened { + material_id: "m2".into(), + timestamp_ms: 2000, + }); + push_reading_event(ReadingEvent::MaterialOpened { + material_id: "m3".into(), + timestamp_ms: 3000, + }); + clear_exported_events(2); + let events = export_pending_events(); + assert_eq!(events.len(), 1); + } + + #[test] + fn test_update_reading_position() { + let _guard = TEST_LOCK.lock().unwrap(); + setup(); + update_reading_position( + "m1", + ReadingPosition::Markdown { block_id: "h1".into(), scroll_progress: 0.5 }, + ); + let events = export_pending_events(); + assert_eq!(events.len(), 1); + assert!(matches!(events[0], ReadingEvent::PositionChanged { .. })); + } + + #[test] + fn test_empty_export() { + let _guard = TEST_LOCK.lock().unwrap(); + setup(); + let events = export_pending_events(); + assert!(events.is_empty()); + } } diff --git a/crates/zx_document_core/src/image_meta.rs b/crates/zx_document_core/src/image_meta.rs index feb70cb..8d10a15 100644 --- a/crates/zx_document_core/src/image_meta.rs +++ b/crates/zx_document_core/src/image_meta.rs @@ -15,18 +15,25 @@ pub struct ImageMeta { /// Read image metadata from a file path. pub fn read_image_meta(file_path: &str) -> Result { - let path = Path::new(file_path); - let file_size = std::fs::metadata(file_path) .map(|m| m.len())?; - let format = path - .extension() - .and_then(|e| e.to_str()) - .unwrap_or("unknown") - .to_lowercase(); + let reader = image::ImageReader::open(file_path).map_err(|e| { + DocumentError::ParseError(format!("image open failed: {e}")) + })?; - let img = image::open(file_path).map_err(|e| { + let format = reader + .format() + .map(image_format_name) + .unwrap_or_else(|| { + Path::new(file_path) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("unknown") + .to_lowercase() + }); + + let img = reader.decode().map_err(|e| { DocumentError::ParseError(format!("image decode failed: {e}")) })?; @@ -40,6 +47,20 @@ pub fn read_image_meta(file_path: &str) -> Result { }) } +fn image_format_name(f: image::ImageFormat) -> String { + match f { + image::ImageFormat::Png => "png", + image::ImageFormat::Jpeg => "jpeg", + image::ImageFormat::Gif => "gif", + image::ImageFormat::WebP => "webp", + image::ImageFormat::Bmp => "bmp", + image::ImageFormat::Tiff => "tiff", + image::ImageFormat::Ico => "ico", + _ => "unknown", + } + .to_string() +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/zx_document_core/src/pdf.rs b/crates/zx_document_core/src/pdf.rs index e69de29..cf483cf 100644 --- a/crates/zx_document_core/src/pdf.rs +++ b/crates/zx_document_core/src/pdf.rs @@ -0,0 +1,3 @@ +// M4: PDF is handled via platform preview (iOS PDFKit / QuickLook, Android system). +// Rust Core only provides the unified ReadingPosition model (see progress.rs). +// Full PDF parsing (pdfium, text extraction) is deferred to a later milestone. diff --git a/crates/zx_document_core/src/search.rs b/crates/zx_document_core/src/search.rs index 34e11a0..1f86de2 100644 --- a/crates/zx_document_core/src/search.rs +++ b/crates/zx_document_core/src/search.rs @@ -9,8 +9,8 @@ pub struct SearchResult { pub block_id: String, pub line_number: Option, pub snippet: String, - pub match_start: usize, - pub match_end: usize, + pub match_start: u64, + pub match_end: u64, } /// Search within a list of DocumentBlock (Markdown). @@ -44,8 +44,8 @@ pub fn search_blocks(blocks: &[DocumentBlock], query: &str) -> Vec block_id: block_id.to_string(), line_number: None, snippet, - match_start: abs_start - snippet_start, - match_end: abs_end - snippet_start, + match_start: (abs_start - snippet_start) as u64, + match_end: (abs_end - snippet_start) as u64, }); start = abs_end; @@ -87,8 +87,8 @@ pub fn search_text(content: &str, query: &str) -> Vec { block_id: format!("line-{line_number}"), line_number: Some(line_number), snippet, - match_start: abs_start - snippet_start, - match_end: abs_end - snippet_start, + match_start: (abs_start - snippet_start) as u64, + match_end: (abs_end - snippet_start) as u64, }); start = abs_end; diff --git a/crates/zx_document_ffi/src/lib.rs b/crates/zx_document_ffi/src/lib.rs index 3406053..1666404 100644 --- a/crates/zx_document_ffi/src/lib.rs +++ b/crates/zx_document_ffi/src/lib.rs @@ -1,3 +1,7 @@ +// FFI functions are called from generated UniFFI bindings (C-ABI), +// so Rust's dead_code analysis doesn't see the calls. +#![allow(dead_code)] + uniffi::setup_scaffolding!(); use std::sync::Arc; @@ -5,6 +9,10 @@ use std::sync::Arc; pub use zx_document_core::material_type::{MaterialType, PreviewMode}; pub use zx_document_core::image_meta::ImageMeta; pub use zx_document_core::text::TextStats; +pub use zx_document_core::search::SearchResult; +pub use zx_document_core::anchors::NoteAnchor; +pub use zx_document_core::progress::ReadingPosition; +pub use zx_document_core::events::ReadingEvent; use zx_document_core::blocks as core_blocks; @@ -113,3 +121,62 @@ fn parse_markdown(content: String) -> Result, DocumentError> })?; Ok(blocks.into_iter().map(Into::into).collect()) } + +fn search_markdown_blocks(blocks: Vec, query: String) -> Vec { + let core_blocks: Vec = blocks.into_iter().map(core_block_from_ffi).collect(); + zx_document_core::search::search_blocks(&core_blocks, &query) +} + +fn search_text_content(content: String, query: String) -> Vec { + zx_document_core::search::search_text(&content, &query) +} + +fn create_note_anchor(material_id: String, position: Option) -> NoteAnchor { + zx_document_core::anchors::NoteAnchor::from_position(&material_id, position.as_ref()) +} + +fn push_reading_event(event: ReadingEvent) { + zx_document_core::events::push_reading_event(event) +} + +fn update_reading_position(material_id: String, position: ReadingPosition) { + zx_document_core::events::update_reading_position(&material_id, position) +} + +fn export_pending_events() -> Vec { + zx_document_core::events::export_pending_events() +} + +fn clear_exported_events(count: u32) { + zx_document_core::events::clear_exported_events(count as usize) +} + +// Reverse conversion: FFI DocumentBlock → core DocumentBlock, used by search. +fn core_block_from_ffi(block: DocumentBlock) -> core_blocks::DocumentBlock { + match block { + DocumentBlock::Heading(id, level, text) => { + core_blocks::DocumentBlock::Heading { id, level, text } + } + DocumentBlock::Paragraph(id, text) => { + core_blocks::DocumentBlock::Paragraph { id, text } + } + DocumentBlock::List(id, ordered, items) => { + core_blocks::DocumentBlock::List { id, ordered, items } + } + DocumentBlock::CodeBlock(id, language, code) => { + core_blocks::DocumentBlock::CodeBlock { id, language, code } + } + DocumentBlock::Quote(id, text) => { + core_blocks::DocumentBlock::Quote { id, text } + } + DocumentBlock::Table(id, headers, rows) => { + core_blocks::DocumentBlock::Table { id, headers, rows } + } + DocumentBlock::ImageBlock(id, src, alt) => { + core_blocks::DocumentBlock::Image { id, src, alt } + } + DocumentBlock::HorizontalRule(id) => { + core_blocks::DocumentBlock::HorizontalRule { id } + } + } +} diff --git a/crates/zx_document_ffi/src/zx_document.udl b/crates/zx_document_ffi/src/zx_document.udl index ac07496..71f172f 100644 --- a/crates/zx_document_ffi/src/zx_document.udl +++ b/crates/zx_document_ffi/src/zx_document.udl @@ -13,6 +13,18 @@ namespace zx_document { [Throws=DocumentError] sequence parse_text([ByRef] string content); + + sequence search_markdown_blocks(sequence blocks, [ByRef] string query); + + sequence search_text_content([ByRef] string content, [ByRef] string query); + + NoteAnchor create_note_anchor([ByRef] string material_id, ReadingPosition? position); + + // Reading event buffer + void push_reading_event(ReadingEvent event); + void update_reading_position([ByRef] string material_id, ReadingPosition position); + sequence export_pending_events(); + void clear_exported_events(u32 count); }; [Error] diff --git a/docs/pdf-strategy.md b/docs/pdf-strategy.md new file mode 100644 index 0000000..7314277 --- /dev/null +++ b/docs/pdf-strategy.md @@ -0,0 +1,184 @@ +# PDF 阅读方案评估 + +## 概述 + +PDF 是知习支持的核心文件格式之一。本文档明确 PDF 在各平台的处理策略、技术选型边界和后续路线图。 + +## 核心结论 + +1. **iOS 第一版继续使用 PDFKit / QuickLook** — 系统内置,无需额外依赖 +2. **Rust 暂不接入 PDFium** — 增加包体但不创造足够价值 +3. **文本选择由平台能力承担** — PDFKit(iOS)、PdfRenderer(Android) +4. **PDF 搜索后置** — 待文本提取方案确定后再做 +5. **扫描 PDF / OCR 暂缓** — 明确不在第一版范围 + +--- + +## 方案对比 + +### 候选方案 + +| 方案 | 二进制大小 | 平台支持 | 文本提取 | 搜索 | 标注 | 成熟度 | +|------|----------|---------|---------|------|------|--------| +| **QuickLook (iOS/macOS)** | 0(系统内置) | Apple only | 否 | 否 | 否 | 高 | +| **PDFKit (iOS/macOS)** | 0(系统内置) | Apple only | 是 | 是 | 是 | 高 | +| **Android PdfRenderer** | 0(系统内置) | Android only | 部分 | 否 | 否 | 中 | +| **pdfium-render (Rust)** | ~15MB/平台 | 全平台 | 是 | 可自建 | 否 | 中 | +| **MuPDF (C)** | ~8MB/平台 | 全平台 | 是 | 可自建 | 部分 | 高 | + +### 评估维度 + +| 维度 | QuickLook (iOS) | PDFKit (iOS) | pdfium-render (Rust) | +|------|----------------|-------------|---------------------| +| 集成成本 | 极低(QLPreviewController) | 低(原生 API) | 高(交叉编译、binding) | +| 包体影响 | 0 | 0 | ~15MB × 平台数 | +| 页面渲染 | ✅ | ✅ | ✅(需要 bitmap pipeline) | +| 文本提取 | ❌ | ✅ | ✅ | +| 文本选择 | ❌(只能看不选) | ✅ | 需自建 UI | +| 搜索 | ❌ | ✅ | 可自建 | +| 阅读位置 | App 侧维护 | App 侧 + delegate | Rust 侧统一 | +| 统一数据模型 | ❌ | ❌ | ✅ | +| 跨平台复用 | ❌ | ❌ | ✅ | + +--- + +## 各平台策略 + +### iOS / macOS + +**当前(M3-M4):PDFKit** + +``` +MaterialReaderView + → PreviewMode.platformPreview + → QuickLook sheet (QLPreviewController) + → App 侧监听页码变化 + → 生成 ReadingPosition::Pdf { pageNumber, pageProgress, overallProgress } + → pushReadingEvent +``` + +优势: +- 零依赖,系统自带 +- 渲染质量高 +- 支持系统级文本选择、搜索 +- 用户熟悉的交互 + +局限: +- QuickLook 不暴露页码变化回调(需要 PDFKit 的 PDFView 才能精确跟踪) +- 无法提取文本传给 Rust 做统一搜索 +- App 侧需用 PDFView delegate 替代 QLPreviewController 以获得页码回调 + +**后续增强**:如果需要文本提取/搜索,把 QuickLook sheet 替换为 PDFView + PDFDocument,通过 `PDFDocument.string` 提取全文传给 Rust `search_text`。 + +### Android + +**当前策略:系统预览 / 外部 App** + +Android 有 `PdfRenderer`(API 21+),可渲染页面为 Bitmap。但第一版不做内置 PDF 阅读器。 + +``` +MaterialReaderView (Android) + → PreviewMode.platformPreview + → Intent.ACTION_VIEW + content:// URI + → 系统 PDF 阅读器 / Chrome + → 回到 App 后手动记录阅读时长 +``` + +后续可选:`PdfRenderer` + `RecyclerView` 自建阅读器,用 Rust `ReadingPosition::Pdf` 统一位置模型。 + +### 鸿蒙 / Windows / Web + +均优先走系统预览或浏览器内置 PDF 阅读器。Rust 只统一 `ReadingPosition::Pdf` 模型。 + +--- + +## 阅读位置模型 + +Rust 已定义统一的 `ReadingPosition::Pdf`: + +```rust +ReadingPosition::Pdf { + page_number: u32, // 1-based 页码 + page_progress: f32, // 0.0 ~ 1.0,该页内滚动比例 + overall_progress: f32, // 0.0 ~ 1.0,全书进度 +} +``` + +App 侧职责: +- iOS:PDFView.pageChange delegate → 更新位置 +- Android:监听页面变化 → 更新位置 +- 所有平台:用同一套 `ReadingPosition::Pdf` 模型,不重复造轮子 + +--- + +## Rust 侧职责边界 + +### 当前(M3-M4) + +- `MaterialType::Pdf` — 文件类型识别 ✅ +- `ReadingPosition::Pdf` — 统一位置模型 ✅ +- `PreviewMode::PlatformPreview` — 预览模式映射 ✅ +- `pdf.rs` — 模块占位(注释说明走平台预览)✅ + +### 不做的 + +- 不集成 PDFium +- 不做 PDF 渲染(bitmap 生成) +- 不做 PDF 文本提取 +- 不做 PDF 标注 +- 不做 OCR + +### 后续评估(M5+) + +如果以下条件满足 **3 项以上**,重新评估 PDFium 集成: + +1. Android 需要内置 PDF 阅读器 +2. 搜索需要在 PDF 中定位 +3. 需要跨平台统一的文本提取 +4. 用户量大到平台差异成为维护负担 +5. PDFium 交叉编译经验积累充分 + +--- + +## 搜索策略 + +| 阶段 | 方案 | 能力 | +|------|------|------| +| M3-M4 | 不搜索 PDF | 用户在平台预览器中手动使用系统搜索 | +| M5+ | 平台提取 + Rust 搜索 | PDFKit.string / PdfRenderer → 传给 Rust `search_text` | +| 远期 | PDFium 提取 + Tantivy | 全文索引,支持 PDF 内定位 | + +--- + +## 扫描 PDF / OCR + +**明确不进入知习范围**。理由: + +- OCR 是独立技术领域,与"阅读内核"定位不符 +- 高精度 OCR 需要专门模型(Tesseract / Apple Vision / ML Kit) +- 绝大多数学习资料是原生电子文档,非扫描件 +- 扫描件场景可由用户自行 OCR 后导入 + +--- + +## 后续路线图 + +``` +M3 ✅ — QuickLook sheet + ReadingPosition::Pdf +M4 ● — pdf-strategy.md(本文档) +M5 ○ — iOS 迁移至 PDFView(获得文本提取和页码回调) + — Rust 接收 PDF 全文做 search_text +M6+ ○ — 评估 PDFium(Android 自建阅读器需求驱动) + — 如集成 PDFium:文本提取 + bitmap 渲染 + 搜索定位 +``` + +--- + +## 验收确认 + +- [x] iOS 第一版继续使用 PDFKit / QuickLook +- [x] Rust 暂不接 PDFium +- [x] PDF 文本选择由平台能力承担 +- [x] PDF 搜索后置 +- [x] 扫描 PDF / OCR 暂缓 +- [x] 文档存在,方案决策明确,有后续路线图 diff --git a/fixtures/markdown/sample.md b/fixtures/markdown/sample.md new file mode 100644 index 0000000..4d7d763 --- /dev/null +++ b/fixtures/markdown/sample.md @@ -0,0 +1,29 @@ +# zhixi-document-runtime + +A cross-platform document reading kernel. + +## 核心原则 + +1. 原文件预览和 AI 解析彻底分离。 +2. Rust Core 不负责网络请求。 +3. Rust Core 不直接访问后端 API。 + +## 支持的格式 + +| 格式 | 预览方式 | 状态 | +| --- | --- | --- | +| Markdown | 内置阅读 | 已支持 | +| TXT | 内置阅读 | 已支持 | +| 图片 | 内置查看 | 已支持 | + +> 第一版目标不是"支持所有文件预览",而是先建立跨平台文档内核。 + +```rust +fn main() { + println!("Hello, zhixi!"); +} +``` + +--- + +*EOF* diff --git a/fixtures/text/sample.txt b/fixtures/text/sample.txt new file mode 100644 index 0000000..099c687 --- /dev/null +++ b/fixtures/text/sample.txt @@ -0,0 +1,7 @@ +zhixi-document-runtime + +A cross-platform document reading kernel built in Rust. + +Supported platforms: iOS, Android, HarmonyOS, macOS, Windows, Web. + +第一版先建立跨平台文档内核、阅读位置模型、学习事件协议和 iOS 接入链路。