diff --git a/core/src/ops/files/query/directory_listing.rs b/core/src/ops/files/query/directory_listing.rs index 5c50920161dd..104a69b9a622 100644 --- a/core/src/ops/files/query/directory_listing.rs +++ b/core/src/ops/files/query/directory_listing.rs @@ -12,6 +12,7 @@ use crate::{ video_media_data, }, infra::query::LibraryQuery, + ops::indexing::IndexScope, }; use sea_orm::{ ColumnTrait, ConnectionTrait, DatabaseConnection, EntityTrait, JoinType, QueryFilter, @@ -753,7 +754,7 @@ impl DirectoryListingQuery { // Get library to dispatch indexer job if let Some(library) = context.get_library(library_id).await { // Create cache entry and get the index to share with the job - let ephemeral_index = cache.create_for_indexing(local_path.clone()); + let ephemeral_index = cache.create_for_indexing(local_path.clone(), IndexScope::Current); // Clear any stale entries from previous indexing (prevents ghost files) let cleared = cache.clear_for_reindex(&local_path).await; @@ -790,7 +791,7 @@ impl DirectoryListingQuery { e ); // Mark indexing as not in progress since job failed - cache.mark_indexing_complete(&local_path); + cache.mark_indexing_complete(&local_path, IndexScope::Current); } } } diff --git a/core/src/ops/indexing/ephemeral/cache.rs b/core/src/ops/indexing/ephemeral/cache.rs index 5c10cd7774ca..5b2aefe8610e 100644 --- a/core/src/ops/indexing/ephemeral/cache.rs +++ b/core/src/ops/indexing/ephemeral/cache.rs @@ -7,9 +7,10 @@ //! (receiving live filesystem updates via `MemoryAdapter`). use super::EphemeralIndex; +use crate::ops::indexing::IndexScope; use parking_lot::RwLock; use std::{ - collections::HashSet, + collections::{HashMap, HashSet}, path::{Path, PathBuf}, sync::Arc, time::Instant, @@ -25,7 +26,8 @@ pub struct EphemeralIndexCache { index: Arc>, /// Paths whose immediate children have been indexed (ready for queries) - indexed_paths: RwLock>, + /// Maps path to its indexing scope (Current vs Recursive) + indexed_paths: RwLock>, /// Paths currently being indexed indexing_in_progress: RwLock>, @@ -42,7 +44,7 @@ impl EphemeralIndexCache { pub fn new() -> std::io::Result { Ok(Self { index: Arc::new(TokioRwLock::new(EphemeralIndex::new()?)), - indexed_paths: RwLock::new(HashSet::new()), + indexed_paths: RwLock::new(HashMap::new()), indexing_in_progress: RwLock::new(HashSet::new()), watched_paths: RwLock::new(HashSet::new()), created_at: Instant::now(), @@ -58,11 +60,38 @@ impl EphemeralIndexCache { /// For search, use `get_for_search()` which checks parent paths. pub fn get_for_path(&self, path: &Path) -> Option>> { let indexed = self.indexed_paths.read(); - if indexed.contains(path) { - Some(self.index.clone()) - } else { - None + + // Check exact match + if indexed.contains_key(path) { + return Some(self.index.clone()); + } + + // Check canonical form + let canonical = path.canonicalize().ok(); + if let Some(ref c) = canonical { + if indexed.contains_key(c.as_path()) { + return Some(self.index.clone()); + } } + + // Check if any recursively-indexed parent covers this path + for (indexed_path, scope) in indexed.iter() { + if *scope != IndexScope::Recursive { + continue; + } + + if path.starts_with(indexed_path) { + return Some(self.index.clone()); + } + + if let Some(ref c) = canonical { + if c.starts_with(indexed_path) { + return Some(self.index.clone()); + } + } + } + + None } /// Get the global index for searching within a path @@ -73,7 +102,7 @@ impl EphemeralIndexCache { let indexed = self.indexed_paths.read(); // First check for exact match - if indexed.contains(path) { + if indexed.contains_key(path) { return Some(self.index.clone()); } @@ -81,7 +110,7 @@ impl EphemeralIndexCache { let canonical_path = path.canonicalize().ok(); // Check if path or its canonical form is under any indexed parent - for indexed_path in indexed.iter() { + for (indexed_path, _scope) in indexed.iter() { // Try with original path if path.starts_with(indexed_path) { return Some(self.index.clone()); @@ -118,7 +147,39 @@ impl EphemeralIndexCache { /// Check if a path has been fully indexed pub fn is_indexed(&self, path: &Path) -> bool { - self.indexed_paths.read().contains(path) + let indexed = self.indexed_paths.read(); + + // Check exact match + if indexed.contains_key(path) { + return true; + } + + // Check canonical form + let canonical = path.canonicalize().ok(); + if let Some(ref c) = canonical { + if indexed.contains_key(c.as_path()) { + return true; + } + } + + // Check if any recursively-indexed parent covers this path + for (indexed_path, scope) in indexed.iter() { + if *scope != IndexScope::Recursive { + continue; + } + + if path.starts_with(indexed_path) { + return true; + } + + if let Some(ref c) = canonical { + if c.starts_with(indexed_path) { + return true; + } + } + } + + false } /// Check if indexing is in progress for a path @@ -146,9 +207,9 @@ impl EphemeralIndexCache { *index = loaded_index; drop(index); - // Mark as indexed - let mut indexed = self.indexed_paths.write(); - indexed.insert(path.to_path_buf()); + // Mark as indexed + let mut indexed = self.indexed_paths.write(); + indexed.insert(path.to_path_buf(), IndexScope::Recursive); tracing::info!("Loaded snapshot for path: {}", path.display()); return Ok(true); @@ -179,7 +240,39 @@ impl EphemeralIndexCache { /// /// If the path was previously indexed, clears its children first to /// prevent ghost entries from deleted files. - pub fn create_for_indexing(&self, path: PathBuf) -> Arc> { + /// + /// If the path is already covered by a recursive parent, returns the + /// existing index without registering (prevents redundant scans). + pub fn create_for_indexing( + &self, + path: PathBuf, + scope: IndexScope, + ) -> Arc> { + let indexed = self.indexed_paths.read(); + let canonical = path.canonicalize().ok(); + + // Check if already covered by a recursive parent + for (existing_path, existing_scope) in indexed.iter() { + if *existing_scope != IndexScope::Recursive { + continue; + } + + let covered = path.starts_with(existing_path) + || canonical + .as_ref() + .map_or(false, |c| c.starts_with(existing_path)); + + if covered { + tracing::debug!( + "Path {} already covered by recursive index at {}, skipping", + path.display(), + existing_path.display() + ); + return self.index.clone(); + } + } + drop(indexed); + let mut in_progress = self.indexing_in_progress.write(); let mut indexed = self.indexed_paths.write(); @@ -214,12 +307,19 @@ impl EphemeralIndexCache { /// Mark indexing as complete for a path /// /// Moves the path from "in progress" to "indexed" state. - pub fn mark_indexing_complete(&self, path: &Path) { + /// If scope is Recursive, subsumes child paths (removes redundant entries). + pub fn mark_indexing_complete(&self, path: &Path, scope: IndexScope) { let mut in_progress = self.indexing_in_progress.write(); let mut indexed = self.indexed_paths.write(); in_progress.remove(path); - indexed.insert(path.to_path_buf()); + + // If this is a recursive scan, subsume child paths + if scope == IndexScope::Recursive { + indexed.retain(|existing, _| !existing.starts_with(path) || existing == path); + } + + indexed.insert(path.to_path_buf(), scope); } /// Remove a path from the indexed set (e.g., on invalidation) @@ -243,7 +343,7 @@ impl EphemeralIndexCache { /// Get all indexed paths pub fn indexed_paths(&self) -> Vec { - self.indexed_paths.read().iter().cloned().collect() + self.indexed_paths.read().keys().cloned().collect() } /// Get all paths currently being indexed @@ -258,7 +358,7 @@ impl EphemeralIndexCache { /// must already be indexed. pub fn register_for_watching(&self, path: PathBuf) -> bool { let indexed = self.indexed_paths.read(); - if !indexed.contains(&path) { + if !indexed.contains_key(&path) { return false; } drop(indexed); @@ -385,7 +485,7 @@ impl EphemeralIndexCache { #[deprecated(note = "Entries should be added directly to the global index")] pub fn insert(&self, path: PathBuf, _index: Arc>) { let mut indexed = self.indexed_paths.write(); - indexed.insert(path); + indexed.insert(path, IndexScope::Recursive); } /// Legacy: Remove (just invalidates the path) @@ -443,12 +543,12 @@ mod tests { let path = PathBuf::from("/test/path"); // Start indexing - let _index = cache.create_for_indexing(path.clone()); + let _index = cache.create_for_indexing(path.clone(), IndexScope::Recursive); assert!(cache.is_indexing(&path)); assert!(!cache.is_indexed(&path)); // Complete indexing - cache.mark_indexing_complete(&path); + cache.mark_indexing_complete(&path, IndexScope::Recursive); assert!(!cache.is_indexing(&path)); assert!(cache.is_indexed(&path)); @@ -464,15 +564,15 @@ mod tests { let path2 = PathBuf::from("/test/path2"); // Start indexing both paths - let index1 = cache.create_for_indexing(path1.clone()); - let index2 = cache.create_for_indexing(path2.clone()); + let index1 = cache.create_for_indexing(path1.clone(), IndexScope::Current); + let index2 = cache.create_for_indexing(path2.clone(), IndexScope::Current); // They should be the same index assert!(Arc::ptr_eq(&index1, &index2)); // Complete both - cache.mark_indexing_complete(&path1); - cache.mark_indexing_complete(&path2); + cache.mark_indexing_complete(&path1, IndexScope::Current); + cache.mark_indexing_complete(&path2, IndexScope::Current); // Both paths now indexed assert!(cache.is_indexed(&path1)); @@ -486,8 +586,8 @@ mod tests { let path = PathBuf::from("/test/path"); // Index the path - let _index = cache.create_for_indexing(path.clone()); - cache.mark_indexing_complete(&path); + let _index = cache.create_for_indexing(path.clone(), IndexScope::Recursive); + cache.mark_indexing_complete(&path, IndexScope::Recursive); assert!(cache.is_indexed(&path)); // Invalidate it @@ -506,10 +606,10 @@ mod tests { let path2 = PathBuf::from("/in_progress"); // One indexed, one in progress - let _index = cache.create_for_indexing(path1.clone()); - cache.mark_indexing_complete(&path1); + let _index = cache.create_for_indexing(path1.clone(), IndexScope::Current); + cache.mark_indexing_complete(&path1, IndexScope::Current); - let _index = cache.create_for_indexing(path2.clone()); + let _index = cache.create_for_indexing(path2.clone(), IndexScope::Current); let stats = cache.stats(); assert_eq!(stats.indexed_paths, 1); @@ -526,8 +626,8 @@ mod tests { assert!(!cache.is_watched(&path)); // Index the path first - let _index = cache.create_for_indexing(path.clone()); - cache.mark_indexing_complete(&path); + let _index = cache.create_for_indexing(path.clone(), IndexScope::Recursive); + cache.mark_indexing_complete(&path, IndexScope::Recursive); // Now we can register for watching assert!(cache.register_for_watching(path.clone())); @@ -550,8 +650,8 @@ mod tests { let child = PathBuf::from("/mnt/nas/documents/report.pdf"); // Index and watch the root - let _index = cache.create_for_indexing(root.clone()); - cache.mark_indexing_complete(&root); + let _index = cache.create_for_indexing(root.clone(), IndexScope::Recursive); + cache.mark_indexing_complete(&root, IndexScope::Recursive); cache.register_for_watching(root.clone()); // Child path should find the watched root @@ -560,4 +660,93 @@ mod tests { // Unrelated path should not find a root assert_eq!(cache.find_watched_root(Path::new("/other/path")), None); } + + #[test] + fn test_parent_path_coverage() { + let cache = EphemeralIndexCache::new().expect("failed to create cache"); + + let root = PathBuf::from("/mnt/volume"); + let _index = cache.create_for_indexing(root.clone(), IndexScope::Recursive); + cache.mark_indexing_complete(&root, IndexScope::Recursive); + + // Child path should be considered indexed + assert!(cache.is_indexed(&PathBuf::from("/mnt/volume/photos/2024"))); + assert!(cache.get_for_path(&PathBuf::from("/mnt/volume/photos/2024")).is_some()); + } + + #[test] + fn test_shallow_browse_no_parent_coverage() { + let cache = EphemeralIndexCache::new().expect("failed to create cache"); + + // Shallow browse of root (Current scope) + let root = PathBuf::from("/mnt/volume"); + let _index = cache.create_for_indexing(root.clone(), IndexScope::Current); + cache.mark_indexing_complete(&root, IndexScope::Current); + + // Child path should NOT be covered by a shallow scan + assert!(!cache.is_indexed(&PathBuf::from("/mnt/volume/photos/2024"))); + } + + #[test] + fn test_no_redundant_scan_under_volume() { + let cache = EphemeralIndexCache::new().expect("failed to create cache"); + + let root = PathBuf::from("/mnt/volume"); + let _index = cache.create_for_indexing(root.clone(), IndexScope::Recursive); + cache.mark_indexing_complete(&root, IndexScope::Recursive); + + // Attempting to create_for_indexing on a child should be a no-op + let child = PathBuf::from("/mnt/volume/photos"); + let _index = cache.create_for_indexing(child.clone(), IndexScope::Current); + + // indexed_paths should still only contain the root + assert_eq!(cache.len(), 1); + } + + #[test] + fn test_volume_subsumes_child_paths() { + let cache = EphemeralIndexCache::new().expect("failed to create cache"); + + // Browse individual directories first + let dir1 = PathBuf::from("/mnt/volume/photos"); + let dir2 = PathBuf::from("/mnt/volume/documents"); + let _index = cache.create_for_indexing(dir1.clone(), IndexScope::Current); + cache.mark_indexing_complete(&dir1, IndexScope::Current); + let _index = cache.create_for_indexing(dir2.clone(), IndexScope::Current); + cache.mark_indexing_complete(&dir2, IndexScope::Current); + + assert_eq!(cache.len(), 2); + + // Now volume index the root (recursive) + let root = PathBuf::from("/mnt/volume"); + let _index = cache.create_for_indexing(root.clone(), IndexScope::Recursive); + cache.mark_indexing_complete(&root, IndexScope::Recursive); + + // Child paths subsumed — only root remains + assert_eq!(cache.len(), 1); + // Children still covered via parent + assert!(cache.is_indexed(&dir1)); + assert!(cache.is_indexed(&dir2)); + } + + #[test] + #[cfg(target_os = "macos")] + fn test_symlink_path_resolution() { + let cache = EphemeralIndexCache::new().expect("failed to create cache"); + + // Simulate volume index at real path + let real_root = PathBuf::from("/System/Volumes/Data"); + let _index = cache.create_for_indexing(real_root.clone(), IndexScope::Recursive); + cache.mark_indexing_complete(&real_root, IndexScope::Recursive); + + // Symlink path should be considered indexed (on macOS /Users -> /System/Volumes/Data/Users) + // This test verifies the canonicalization logic + let symlink_path = PathBuf::from("/Users/jamespine"); + // Only test if the path exists and canonicalizes to something under the indexed root + if let Ok(canonical) = symlink_path.canonicalize() { + if canonical.starts_with(&real_root) { + assert!(cache.is_indexed(&symlink_path)); + } + } + } } diff --git a/core/src/ops/indexing/ephemeral/index.rs b/core/src/ops/indexing/ephemeral/index.rs index ae2e2eb56f7e..ae53e3d1e3fb 100644 --- a/core/src/ops/indexing/ephemeral/index.rs +++ b/core/src/ops/indexing/ephemeral/index.rs @@ -123,6 +123,20 @@ impl EphemeralIndex { }) } + /// Resolve a path to its EntryId, handling symlinks. + /// + /// Tries direct lookup first (fast path), then canonicalizes and retries (slow path). + /// Canonicalization resolves symlinks so queries like `/Users/jamespine` find entries + /// stored as `/System/Volumes/Data/Users/jamespine` on macOS. + fn resolve_entry_id(&self, path: &Path) -> Option { + if let Some(&id) = self.path_index.get(path) { + return Some(id); + } + path.canonicalize() + .ok() + .and_then(|canonical| self.path_index.get(&canonical).copied()) + } + /// Ensures a directory exists, creating all missing ancestors recursively. /// /// This method guarantees that `list_directory()` works immediately after @@ -286,8 +300,8 @@ impl EphemeralIndex { } pub fn get_entry(&mut self, path: &PathBuf) -> Option { - let id = self.path_index.get(path)?; - let node = self.arena.get(*id)?; + let id = self.resolve_entry_id(path)?; + let node = self.arena.get(id)?; self.last_accessed = Instant::now(); @@ -310,8 +324,8 @@ impl EphemeralIndex { /// Get entry reference for read-only access (doesn't update last_accessed) pub fn get_entry_ref(&self, path: &PathBuf) -> Option { - let id = self.path_index.get(path)?; - let node = self.arena.get(*id)?; + let id = self.resolve_entry_id(path)?; + let node = self.arena.get(id)?; Some(EntryMetadata { path: path.clone(), @@ -331,8 +345,8 @@ impl EphemeralIndex { } pub fn get_entry_uuid(&self, path: &PathBuf) -> Option { - let entry_id = self.path_index.get(path)?; - self.entry_uuids.get(entry_id).copied() + let entry_id = self.resolve_entry_id(path)?; + self.entry_uuids.get(&entry_id).copied() } /// Get or assign a UUID for the given path (lazy generation). @@ -343,8 +357,8 @@ impl EphemeralIndex { /// to persistent indexes. pub fn get_or_assign_uuid(&mut self, path: &PathBuf) -> Uuid { // Look up EntryId for this path - let entry_id = match self.path_index.get(path) { - Some(&id) => id, + let entry_id = match self.resolve_entry_id(path) { + Some(id) => id, None => return Uuid::new_v4(), // Path not found, return random UUID }; @@ -376,8 +390,8 @@ impl EphemeralIndex { } pub fn get_content_kind(&self, path: &PathBuf) -> ContentKind { - let entry_id = match self.path_index.get(path) { - Some(&id) => id, + let entry_id = match self.resolve_entry_id(path) { + Some(id) => id, None => return ContentKind::Unknown, }; @@ -388,8 +402,8 @@ impl EphemeralIndex { } pub fn list_directory(&self, path: &Path) -> Option> { - let id = self.path_index.get(path)?; - let node = self.arena.get(*id)?; + let id = self.resolve_entry_id(path)?; + let node = self.arena.get(id)?; Some( node.children @@ -410,7 +424,7 @@ impl EphemeralIndex { pub fn clear_directory_children( &mut self, dir_path: &Path, - indexed_paths: &std::collections::HashSet, + indexed_paths: &std::collections::HashMap, ) -> (usize, Vec) { let dir_id = match self.path_index.get(dir_path) { Some(&id) => id, @@ -432,8 +446,8 @@ impl EphemeralIndex { let child_node = self.arena.get(child_id)?; let child_path = self.reconstruct_path(child_id)?; - // Preserve subdirectories that were explicitly browsed AND still exist - if child_node.is_directory() && indexed_paths.contains(&child_path) { + // Preserve subdirectories that were explicitly browsed AND still exist + if child_node.is_directory() && indexed_paths.contains_key(&child_path) { // Verify the directory still exists on the filesystem if std::fs::metadata(&child_path).is_ok() { return None; // Preserve - still exists and was browsed @@ -615,7 +629,7 @@ impl EphemeralIndex { /// Check if an entry exists at the given path. pub fn has_entry(&self, path: &Path) -> bool { - self.path_index.contains_key(path) + self.resolve_entry_id(path).is_some() } /// Remove an entry at the given path. diff --git a/core/src/ops/indexing/job.rs b/core/src/ops/indexing/job.rs index bc51df6d05c1..5a64a6a8bd0b 100644 --- a/core/src/ops/indexing/job.rs +++ b/core/src/ops/indexing/job.rs @@ -681,7 +681,7 @@ impl JobHandler for IndexerJob { ctx.library() .core_context() .ephemeral_cache() - .mark_indexing_complete(local_path); + .mark_indexing_complete(local_path, self.config.scope); match &result { Ok(_) => { ctx.log(format!( diff --git a/core/src/ops/volumes/index/action.rs b/core/src/ops/volumes/index/action.rs index d23f1804a289..a9d96f9838c4 100644 --- a/core/src/ops/volumes/index/action.rs +++ b/core/src/ops/volumes/index/action.rs @@ -94,7 +94,7 @@ impl LibraryAction for IndexVolumeAction { // 5. Get ephemeral cache and create/reuse index for this volume let ephemeral_cache = context.ephemeral_cache(); - let index = ephemeral_cache.create_for_indexing(volume.mount_point.clone()); + let index = ephemeral_cache.create_for_indexing(volume.mount_point.clone(), self.input.scope); indexer_job.set_ephemeral_index(index.clone()); // 6. Clear stale entries if this volume was previously indexed @@ -138,6 +138,7 @@ impl LibraryAction for IndexVolumeAction { let mount_point_clone = volume.mount_point.clone(); let volume_name = volume.name.clone(); let job_id_str = job_id.to_string(); + let scope_clone = self.input.scope; tokio::spawn(async move { let mut event_rx = context_clone.events.subscribe(); @@ -171,9 +172,9 @@ impl LibraryAction for IndexVolumeAction { error!("Failed to save volume stats: {}", e); } - // Mark as indexed and register for watching - let ephemeral_cache = context_clone.ephemeral_cache(); - ephemeral_cache.mark_indexing_complete(&mount_point_clone); + // Mark as indexed and register for watching + let ephemeral_cache = context_clone.ephemeral_cache(); + ephemeral_cache.mark_indexing_complete(&mount_point_clone, scope_clone); let _ = ephemeral_cache .register_for_watching(mount_point_clone.clone());