Skip to content

Commit 3be4e89

Browse files
StarbirdTechclaude
andcommitted
Merge proper speech-to-text feature gating from PR #3006
Resolves conflicts by taking the new feature-gated implementation which: - Uses #[cfg(feature = "speech-to-text")] instead of #[cfg(all(feature = "ffmpeg", feature = "whisper"))] - Properly integrates with the ffmpeg feature hierarchy Co-Authored-By: Claude Opus 4.5 <[email protected]>
2 parents 0657f4c + 74d3b82 commit 3be4e89

File tree

13 files changed

+49
-41
lines changed

13 files changed

+49
-41
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ concurrency:
2424
jobs:
2525
rustfmt:
2626
name: Rust Formatting
27-
runs-on: ubuntu-22.04
27+
runs-on: blacksmith-4vcpu-ubuntu-2204
2828
timeout-minutes: 10
2929
permissions:
3030
contents: read
@@ -94,7 +94,7 @@ jobs:
9494
matrix:
9595
settings:
9696
- host: ubuntu-22.04
97-
target: x86_64-unknown-linux-gnu
97+
target: blacksmith-4vcpu-ubuntu-2404
9898
name: Clippy (${{ matrix.settings.host }})
9999
runs-on: ${{ matrix.settings.host }}
100100
permissions:
@@ -164,7 +164,7 @@ jobs:
164164

165165
typescript:
166166
name: TypeScript
167-
runs-on: ubuntu-22.04
167+
runs-on: blacksmith-4vcpu-ubuntu-2204
168168
timeout-minutes: 15
169169
permissions:
170170
contents: read

.github/workflows/core_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
target: aarch64-apple-darwin
2323
os: macos
2424
- host: ubuntu-22.04
25-
target: x86_64-unknown-linux-gnu
25+
target: blacksmith-4vcpu-ubuntu-2404
2626
os: linux
2727
- host: [self-hosted, Windows, X64]
2828
target: x86_64-pc-windows-msvc

.github/workflows/release.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ jobs:
6565

6666
# - name: Build CLI binaries
6767
# run: |
68-
# cargo build --release --bin sd-cli --bin sd-daemon --features heif,ffmpeg --target ${{ matrix.target }}
68+
# cargo build --release --bin sd-cli --bin sd-daemon --features heif,ffmpeg,ai --target ${{ matrix.target }}
6969
# env:
7070
# # Set linker for cross-compilation
7171
# CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER: aarch64-linux-gnu-gcc
@@ -111,7 +111,7 @@ jobs:
111111
target: x86_64-unknown-linux-gnu
112112
platform: linux-x86_64
113113
- host: ubuntu-22.04
114-
target: aarch64-unknown-linux-gnu
114+
target: blacksmith-4vcpu-ubuntu-2404-arm
115115
platform: linux-aarch64
116116
name: Server - ${{ matrix.settings.platform }}
117117
runs-on: ${{ matrix.settings.host }}
@@ -144,7 +144,7 @@ jobs:
144144

145145
- name: Build server binary
146146
run: |
147-
cargo build --release --bin sd-server --features sd-core/heif,sd-core/ffmpeg --target ${{ matrix.settings.target }}
147+
cargo build --release --bin sd-server --features sd-core/heif,sd-core/ffmpeg,sd-core/ai --target ${{ matrix.settings.target }}
148148
env:
149149
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER: aarch64-linux-gnu-gcc
150150

@@ -194,7 +194,7 @@ jobs:
194194
# arch: x86_64
195195
# Linux builds
196196
- host: ubuntu-22.04
197-
target: x86_64-unknown-linux-gnu
197+
target: blacksmith-4vcpu-ubuntu-2404
198198
bundles: deb
199199
os: linux
200200
arch: x86_64
@@ -309,7 +309,7 @@ jobs:
309309
# Create unified release with Server, CLI, and Desktop artifacts
310310
release:
311311
if: startsWith(github.ref, 'refs/tags/')
312-
runs-on: self-hosted
312+
runs-on: blacksmith-4vcpu-ubuntu-2404
313313
name: Create Release
314314
needs: [server-build, desktop-main]
315315
permissions:

.github/workflows/server.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ on:
1313
jobs:
1414
build-server:
1515
name: Build a docker image for spacedrive server
16-
runs-on: ubuntu-latest
16+
runs-on: blacksmith-4vcpu-ubuntu-2404
1717
defaults:
1818
run:
1919
shell: bash

apps/cli/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ version = "2.0.0-pre.1"
77
default = []
88
heif = ["sd-core/heif"]
99
ffmpeg = ["sd-core/ffmpeg"]
10+
whisper = ["sd-core/whisper"]
11+
speech-to-text = ["sd-core/speech-to-text"]
12+
ai = ["sd-core/ai"]
1013

1114
[dependencies]
1215
anyhow = "1"

apps/server/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ edition = "2021"
77
default = []
88
heif = ["sd-core/heif"]
99
ffmpeg = ["sd-core/ffmpeg"]
10+
whisper = ["sd-core/whisper"]
11+
speech-to-text = ["sd-core/speech-to-text"]
12+
ai = ["sd-core/ai"]
1013

1114
[dependencies]
1215
# Spacedrive core

core/Cargo.toml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@ autobins = true
66

77
[features]
88
default = ["wasm"]
9-
# FFmpeg support for video thumbnails and audio transcription
9+
# FFmpeg support for video thumbnails and audio extraction
1010
ffmpeg = ["dep:sd-ffmpeg"]
11-
# AI models support
12-
ai = []
11+
# Whisper speech recognition engine (internal dependency)
12+
whisper = ["dep:whisper-rs", "dep:hound", "dep:rubato"]
13+
# Speech-to-text transcription (requires audio extraction + recognition)
14+
speech-to-text = ["ffmpeg", "whisper"]
15+
# AI features umbrella (heavy deps, can be disabled for lite builds or mobile)
16+
ai = ["speech-to-text"]
1317
# HEIF image support (extends sd-images with HEIF format)
1418
heif = ["sd-images/heif"]
1519
# Mobile platform support (excludes wasm which doesn't work on iOS)
@@ -18,8 +22,6 @@ mobile = []
1822
cli = []
1923
# WASM plugin system (disabled on mobile)
2024
wasm = ["dep:wasmer", "dep:wasmer-middlewares"]
21-
# Whisper speech-to-text support (disabled on Android due to BLAS cross-compilation issues)
22-
whisper = ["dep:whisper-rs", "dep:hound", "dep:rubato"]
2325

2426

2527
[dependencies]
@@ -126,10 +128,10 @@ sd-media-metadata = { path = "../crates/media-metadata" }
126128
tokio-rustls = "0.26"
127129
webp = "0.3"
128130

129-
# Speech-to-text dependencies (optional - disabled on Android due to cross-compilation issues)
131+
# Speech-to-text dependencies (optional, behind whisper feature)
130132
whisper-rs = { version = "0.15.1", optional = true }
131-
hound = { version = "3.5", optional = true } # WAV file reading
132-
rubato = { version = "0.16", optional = true } # Audio resampling to 16kHz
133+
hound = { version = "3.5", optional = true } # WAV file reading
134+
rubato = { version = "0.16", optional = true } # Audio resampling to 16kHz
133135

134136
# Networking
135137
# Iroh P2P networking

core/src/domain/location.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ impl Default for SpeechPolicy {
539539

540540
impl SpeechPolicy {
541541
/// Convert this policy to a SpeechToTextJobConfig for job dispatch
542-
#[cfg(all(feature = "ffmpeg", feature = "whisper"))]
542+
#[cfg(feature = "speech-to-text")]
543543
pub fn to_job_config(
544544
&self,
545545
location_id: Option<Uuid>,

core/src/ops/indexing/change_detection/persistent.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -405,11 +405,13 @@ impl ChangeHandler for DatabaseAdapter {
405405
use crate::ops::indexing::processor::{
406406
load_location_processor_config, ContentHashProcessor, ProcessorEntry,
407407
};
408-
#[cfg(all(feature = "ffmpeg", feature = "whisper"))]
409-
use crate::ops::media::speech::SpeechToTextProcessor;
410408
use crate::ops::media::{ocr::OcrProcessor, proxy::ProxyProcessor};
411409
#[cfg(feature = "ffmpeg")]
412-
use crate::ops::media::{thumbnail::ThumbnailProcessor, thumbstrip::ThumbstripProcessor};
410+
use crate::ops::media::{
411+
thumbnail::ThumbnailProcessor, thumbstrip::ThumbstripProcessor,
412+
};
413+
#[cfg(feature = "speech-to-text")]
414+
use crate::ops::media::speech::SpeechToTextProcessor;
413415

414416
if entry.is_directory() {
415417
return Ok(());
@@ -583,7 +585,7 @@ impl ChangeHandler for DatabaseAdapter {
583585
}
584586

585587
// Speech-to-text
586-
#[cfg(all(feature = "ffmpeg", feature = "whisper"))]
588+
#[cfg(feature = "speech-to-text")]
587589
if proc_config
588590
.watcher_processors
589591
.iter()

core/src/ops/locations/trigger_job/action.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ impl LibraryAction for LocationTriggerJobAction {
168168
})?
169169
}
170170

171-
#[cfg(all(feature = "ffmpeg", feature = "whisper"))]
171+
#[cfg(feature = "speech-to-text")]
172172
JobType::SpeechToText => {
173173
if !job_policies.speech_to_text.enabled && !self.input.force {
174174
return Err(ActionError::Validation {
@@ -198,13 +198,11 @@ impl LibraryAction for LocationTriggerJobAction {
198198
});
199199
}
200200

201-
#[cfg(not(all(feature = "ffmpeg", feature = "whisper")))]
201+
#[cfg(not(feature = "speech-to-text"))]
202202
JobType::SpeechToText => {
203203
return Err(ActionError::Validation {
204204
field: "job_type".to_string(),
205-
message:
206-
"Speech-to-text requires FFmpeg and Whisper support which is not enabled"
207-
.to_string(),
205+
message: "Speech-to-text requires FFmpeg and Whisper support which is not enabled".to_string(),
208206
});
209207
}
210208

0 commit comments

Comments
 (0)