Skip to content

Commit 4cfa792

Browse files
committed
Add CLI commands for model management (fixes #20)
- Add three new CLI commands using typer: - `sct download-models`: Pre-download ML models with progress - `sct verify-models`: Verify model presence and integrity - `sct model-info`: Display model metadata and status - Migrate CLI from argparse to typer for better command structure - Maintain backward compatibility with existing CLI usage - Add typer as a new dependency - Update README with new command documentation - Update CHANGELOG for v0.1.5
1 parent 0b872ac commit 4cfa792

5 files changed

Lines changed: 195 additions & 62 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## [0.1.5] - Unreleased
4+
5+
- Add CLI commands for model management (`download-models`, `verify-models`, `model-info`)
6+
- Migrate CLI from argparse to typer for better command structure
7+
- Add typer as a dependency
8+
- Maintain backward compatibility with existing CLI usage
9+
310
## [0.1.4] - 2025-04-24
411

512
- Added `bytes_offsets` option to generate UTF-8 byte positions instead of character positions

README.md

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ pip install iscc-sct
2626
python -c "import iscc_sct as sct; print(sct.create('Your text here').iscc)"
2727

2828
# Or use the CLI
29+
sct gen "path/to/textfile.txt"
30+
31+
# Backward-compatible (old-style command still works)
2932
sct "path/to/textfile.txt"
3033
```
3134

@@ -172,20 +175,53 @@ print(f"Hamming distance in bits: {distance}")
172175
The installation also provides a sct command-line tool:
173176

174177
```shell
175-
usage: sct [-h] [-b BITS] [-g] [-d] [path]
178+
usage: sct [OPTIONS] COMMAND [ARGS]...
179+
180+
Generate Semantic Text-Codes for text files and manage models.
181+
182+
Commands:
183+
gen Generate Semantic Text-Codes for text files.
184+
download-models Download all required models with progress reporting.
185+
verify-models Verify model presence and integrity.
186+
model-info Display model metadata (versions, paths, sizes).
187+
188+
Options:
189+
--help Show this message and exit.
190+
```
176191

177-
Generate Semantic Text-Codes for text files.
192+
### CLI Commands
178193

179-
positional arguments:
180-
path Path to text files (supports glob patterns) or 'gui' to launch Gradio demo.
194+
#### Generate Semantic Text-Codes
181195

182-
options:
183-
-h, --help show this help message and exit
184-
-b BITS, --bits BITS Bit-Length of Code (default 256)
196+
```shell
197+
sct gen [OPTIONS] [PATH]
198+
199+
Options:
200+
-b, --bits INTEGER Bit-Length of Code (default 256)
185201
-g, --granular Activate granular processing.
186202
-d, --debug Show debugging messages.
187203
```
188204

205+
#### Model Management
206+
207+
Pre-download models (useful for containers and CI/CD):
208+
209+
```shell
210+
sct download-models
211+
```
212+
213+
Verify model integrity:
214+
215+
```shell
216+
sct verify-models
217+
```
218+
219+
Display model information:
220+
221+
```shell
222+
sct model-info
223+
```
224+
189225
## How It Works
190226

191227
```
@@ -218,7 +254,8 @@ See iscc_sct/options.py for more configuration settings.
218254

219255
## Performance Considerations
220256

221-
- The embedding model will be downloaded on first execution
257+
- The embedding model will be downloaded automatically on first execution
258+
- You can pre-download models using `sct download-models` (useful for containers and CI/CD pipelines)
222259
- **CPU vs GPU**: On systems with CUDA-compatible GPUs, install with `pip install iscc-sct[gpu]` for
223260
significantly faster processing.
224261

iscc_sct/cli.py

Lines changed: 126 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,160 @@
1-
import argparse
21
import glob
2+
import sys
33
from pathlib import Path
4+
from typing import Optional
5+
import typer
46
from loguru import logger
57
from iscc_sct.main import create
68
from charset_normalizer import from_bytes
9+
from iscc_sct import utils
710

11+
app = typer.Typer(
12+
name="iscc-sct",
13+
help="Generate Semantic Text-Codes for text files and manage models.",
14+
no_args_is_help=True,
15+
)
816

9-
def main():
10-
parser = argparse.ArgumentParser(description="Generate Semantic Text-Codes for text files.")
11-
parser.add_argument(
12-
"path",
13-
type=str,
17+
18+
@app.command(name="gen")
19+
def generate(
20+
path: Optional[str] = typer.Argument(
21+
None,
1422
help="Path to text files (supports glob patterns) or 'gui' to launch Gradio demo.",
15-
nargs="?",
16-
)
17-
parser.add_argument(
18-
"-b", "--bits", type=int, default=256, help="Bit-Length of Code (default 256)"
19-
)
20-
parser.add_argument(
21-
"-g", "--granular", action="store_true", help="Activate granular processing."
22-
)
23-
parser.add_argument("-d", "--debug", action="store_true", help="Show debugging messages.")
24-
args = parser.parse_args()
25-
26-
if args.path is None:
27-
parser.print_help()
28-
return
23+
),
24+
bits: int = typer.Option(256, "--bits", "-b", help="Bit-Length of Code (default 256)"),
25+
granular: bool = typer.Option(False, "--granular", "-g", help="Activate granular processing."),
26+
debug: bool = typer.Option(False, "--debug", "-d", help="Show debugging messages."),
27+
):
28+
"""Generate Semantic Text-Codes for text files."""
29+
if path is None:
30+
typer.echo("Error: Missing path argument")
31+
raise typer.Exit(1)
2932

30-
if not args.debug:
33+
if not debug:
3134
logger.remove()
3235

33-
if args.path == "gui": # pragma: no cover
36+
if path == "gui": # pragma: no cover
3437
try:
3538
from iscc_sct.demo import demo
3639

3740
demo.launch(inbrowser=True)
3841
except ImportError:
39-
print(
42+
typer.echo(
4043
"Error: Gradio is not installed. Please install it with 'pip install gradio' to use the GUI."
4144
)
4245
return
4346

44-
for path in glob.glob(args.path):
45-
path = Path(path)
46-
if path.is_file():
47-
logger.debug(f"Processing {path.name}")
48-
with path.open("rb") as file:
47+
for file_path in glob.glob(path):
48+
file_path = Path(file_path)
49+
if file_path.is_file():
50+
logger.debug(f"Processing {file_path.name}")
51+
with file_path.open("rb") as file:
4952
data = file.read()
5053
try:
5154
text = data.decode("utf-8")
5255
if not text.strip():
53-
logger.warning(f"SKIPPED empty: {path}")
56+
logger.warning(f"SKIPPED empty: {file_path}")
5457
continue
5558
except UnicodeDecodeError:
56-
logger.debug(f"Could not decode {path.name} as UTF-8.")
59+
logger.debug(f"Could not decode {file_path.name} as UTF-8.")
5760
charset_match = from_bytes(data).best()
5861
if not charset_match: # pragma: no cover
59-
logger.error(f"SKIPPING {path.name} - failed to detect text encoding")
62+
logger.error(
63+
f"SKIPPING {file_path.name} - failed to detect text encoding"
64+
)
6065
continue
61-
logger.debug(f"Decode {path.name} with {charset_match.encoding}.")
66+
logger.debug(f"Decode {file_path.name} with {charset_match.encoding}.")
6267
text = str(charset_match)
63-
sct_meta = create(text, granular=args.granular, bits=args.bits)
64-
if args.granular:
65-
print(repr(sct_meta))
68+
sct_meta = create(text, granular=granular, bits=bits)
69+
if granular:
70+
typer.echo(repr(sct_meta))
6671
else:
67-
print(sct_meta.iscc)
72+
typer.echo(sct_meta.iscc)
73+
74+
75+
@app.command(name="download-models")
76+
def download_models():
77+
"""Download all required models with progress reporting."""
78+
typer.echo("Downloading models...")
79+
typer.echo(f"Model: {utils.MODEL_FILENAME}")
80+
typer.echo(f"URL: {utils.MODEL_URL}")
81+
typer.echo(f"Destination: {utils.MODEL_PATH}")
82+
83+
try:
84+
model_path = utils.get_model()
85+
typer.secho("✓ Model downloaded and verified successfully!", fg=typer.colors.GREEN)
86+
typer.echo(f"Location: {model_path}")
87+
except Exception as e:
88+
typer.secho(f"✗ Failed to download model: {e}", fg=typer.colors.RED, err=True)
89+
raise typer.Exit(1)
90+
91+
92+
@app.command(name="verify-models")
93+
def verify_models():
94+
"""Verify model presence and integrity."""
95+
typer.echo("Verifying models...")
96+
typer.echo(f"Model: {utils.MODEL_FILENAME}")
97+
typer.echo(f"Expected location: {utils.MODEL_PATH}")
98+
99+
if not utils.MODEL_PATH.exists():
100+
typer.secho("✗ Model file not found!", fg=typer.colors.RED)
101+
typer.echo(f"Run 'iscc-sct download-models' to download the model.")
102+
raise typer.Exit(1)
103+
104+
try:
105+
utils.check_integrity(utils.MODEL_PATH, utils.MODEL_CHECKSUM)
106+
typer.secho("✓ Model integrity verified successfully!", fg=typer.colors.GREEN)
107+
typer.echo(f"Location: {utils.MODEL_PATH}")
108+
except RuntimeError as e:
109+
typer.secho(f"✗ Model integrity check failed: {e}", fg=typer.colors.RED, err=True)
110+
typer.echo("Run 'iscc-sct download-models' to re-download the model.")
111+
raise typer.Exit(1)
112+
113+
114+
@app.command(name="model-info")
115+
def model_info():
116+
"""Display model metadata (versions, paths, sizes)."""
117+
typer.echo("Model Information:")
118+
typer.echo("=" * 50)
119+
typer.echo(f"Model Name: {utils.MODEL_FILENAME}")
120+
typer.echo(f"Model Version: {utils.BASE_VERSION}")
121+
typer.echo(f"Download URL: {utils.MODEL_URL}")
122+
typer.echo(f"Local Path: {utils.MODEL_PATH}")
123+
typer.echo(f"Checksum: {utils.MODEL_CHECKSUM}")
124+
125+
if utils.MODEL_PATH.exists():
126+
size_mb = utils.MODEL_PATH.stat().st_size / (1024 * 1024)
127+
typer.secho(f"\n✓ Status: Downloaded ({size_mb:.2f} MB)", fg=typer.colors.GREEN)
128+
try:
129+
utils.check_integrity(utils.MODEL_PATH, utils.MODEL_CHECKSUM)
130+
typer.secho("✓ Integrity: Verified", fg=typer.colors.GREEN)
131+
except RuntimeError:
132+
typer.secho("✗ Integrity: Failed", fg=typer.colors.RED)
133+
typer.echo(" Run 'iscc-sct download-models' to re-download the model.")
134+
else:
135+
typer.secho("\n✗ Status: Not downloaded", fg=typer.colors.YELLOW)
136+
typer.echo(" Run 'iscc-sct download-models' to download the model.")
137+
138+
139+
def main():
140+
"""Main entry point that handles backward compatibility."""
141+
# If called with old-style arguments (no subcommand), route to generate command
142+
if len(sys.argv) > 1 and not sys.argv[1].startswith("-") and sys.argv[1] not in [
143+
"gen",
144+
"download-models",
145+
"verify-models",
146+
"model-info",
147+
"generate",
148+
]:
149+
# Old-style usage: sct <path> [options]
150+
# Convert to new style: sct gen <path> [options]
151+
sys.argv.insert(1, "gen")
152+
elif len(sys.argv) == 1:
153+
# No arguments, show help
154+
app()
155+
return
156+
157+
app()
68158

69159

70160
if __name__ == "__main__": # pragma: no cover

0 commit comments

Comments
 (0)