|
1 | | -import argparse |
2 | 1 | import glob |
| 2 | +import sys |
3 | 3 | from pathlib import Path |
| 4 | +from typing import Optional |
| 5 | +import typer |
4 | 6 | from loguru import logger |
5 | 7 | from iscc_sct.main import create |
6 | 8 | from charset_normalizer import from_bytes |
| 9 | +from iscc_sct import utils |
7 | 10 |
|
| 11 | +app = typer.Typer( |
| 12 | + name="iscc-sct", |
| 13 | + help="Generate Semantic Text-Codes for text files and manage models.", |
| 14 | + no_args_is_help=True, |
| 15 | +) |
8 | 16 |
|
9 | | -def main(): |
10 | | - parser = argparse.ArgumentParser(description="Generate Semantic Text-Codes for text files.") |
11 | | - parser.add_argument( |
12 | | - "path", |
13 | | - type=str, |
| 17 | + |
| 18 | +@app.command(name="gen") |
| 19 | +def generate( |
| 20 | + path: Optional[str] = typer.Argument( |
| 21 | + None, |
14 | 22 | help="Path to text files (supports glob patterns) or 'gui' to launch Gradio demo.", |
15 | | - nargs="?", |
16 | | - ) |
17 | | - parser.add_argument( |
18 | | - "-b", "--bits", type=int, default=256, help="Bit-Length of Code (default 256)" |
19 | | - ) |
20 | | - parser.add_argument( |
21 | | - "-g", "--granular", action="store_true", help="Activate granular processing." |
22 | | - ) |
23 | | - parser.add_argument("-d", "--debug", action="store_true", help="Show debugging messages.") |
24 | | - args = parser.parse_args() |
25 | | - |
26 | | - if args.path is None: |
27 | | - parser.print_help() |
28 | | - return |
| 23 | + ), |
| 24 | + bits: int = typer.Option(256, "--bits", "-b", help="Bit-Length of Code (default 256)"), |
| 25 | + granular: bool = typer.Option(False, "--granular", "-g", help="Activate granular processing."), |
| 26 | + debug: bool = typer.Option(False, "--debug", "-d", help="Show debugging messages."), |
| 27 | +): |
| 28 | + """Generate Semantic Text-Codes for text files.""" |
| 29 | + if path is None: |
| 30 | + typer.echo("Error: Missing path argument") |
| 31 | + raise typer.Exit(1) |
29 | 32 |
|
30 | | - if not args.debug: |
| 33 | + if not debug: |
31 | 34 | logger.remove() |
32 | 35 |
|
33 | | - if args.path == "gui": # pragma: no cover |
| 36 | + if path == "gui": # pragma: no cover |
34 | 37 | try: |
35 | 38 | from iscc_sct.demo import demo |
36 | 39 |
|
37 | 40 | demo.launch(inbrowser=True) |
38 | 41 | except ImportError: |
39 | | - print( |
| 42 | + typer.echo( |
40 | 43 | "Error: Gradio is not installed. Please install it with 'pip install gradio' to use the GUI." |
41 | 44 | ) |
42 | 45 | return |
43 | 46 |
|
44 | | - for path in glob.glob(args.path): |
45 | | - path = Path(path) |
46 | | - if path.is_file(): |
47 | | - logger.debug(f"Processing {path.name}") |
48 | | - with path.open("rb") as file: |
| 47 | + for file_path in glob.glob(path): |
| 48 | + file_path = Path(file_path) |
| 49 | + if file_path.is_file(): |
| 50 | + logger.debug(f"Processing {file_path.name}") |
| 51 | + with file_path.open("rb") as file: |
49 | 52 | data = file.read() |
50 | 53 | try: |
51 | 54 | text = data.decode("utf-8") |
52 | 55 | if not text.strip(): |
53 | | - logger.warning(f"SKIPPED empty: {path}") |
| 56 | + logger.warning(f"SKIPPED empty: {file_path}") |
54 | 57 | continue |
55 | 58 | except UnicodeDecodeError: |
56 | | - logger.debug(f"Could not decode {path.name} as UTF-8.") |
| 59 | + logger.debug(f"Could not decode {file_path.name} as UTF-8.") |
57 | 60 | charset_match = from_bytes(data).best() |
58 | 61 | if not charset_match: # pragma: no cover |
59 | | - logger.error(f"SKIPPING {path.name} - failed to detect text encoding") |
| 62 | + logger.error( |
| 63 | + f"SKIPPING {file_path.name} - failed to detect text encoding" |
| 64 | + ) |
60 | 65 | continue |
61 | | - logger.debug(f"Decode {path.name} with {charset_match.encoding}.") |
| 66 | + logger.debug(f"Decode {file_path.name} with {charset_match.encoding}.") |
62 | 67 | text = str(charset_match) |
63 | | - sct_meta = create(text, granular=args.granular, bits=args.bits) |
64 | | - if args.granular: |
65 | | - print(repr(sct_meta)) |
| 68 | + sct_meta = create(text, granular=granular, bits=bits) |
| 69 | + if granular: |
| 70 | + typer.echo(repr(sct_meta)) |
66 | 71 | else: |
67 | | - print(sct_meta.iscc) |
| 72 | + typer.echo(sct_meta.iscc) |
| 73 | + |
| 74 | + |
| 75 | +@app.command(name="download-models") |
| 76 | +def download_models(): |
| 77 | + """Download all required models with progress reporting.""" |
| 78 | + typer.echo("Downloading models...") |
| 79 | + typer.echo(f"Model: {utils.MODEL_FILENAME}") |
| 80 | + typer.echo(f"URL: {utils.MODEL_URL}") |
| 81 | + typer.echo(f"Destination: {utils.MODEL_PATH}") |
| 82 | + |
| 83 | + try: |
| 84 | + model_path = utils.get_model() |
| 85 | + typer.secho("✓ Model downloaded and verified successfully!", fg=typer.colors.GREEN) |
| 86 | + typer.echo(f"Location: {model_path}") |
| 87 | + except Exception as e: |
| 88 | + typer.secho(f"✗ Failed to download model: {e}", fg=typer.colors.RED, err=True) |
| 89 | + raise typer.Exit(1) |
| 90 | + |
| 91 | + |
| 92 | +@app.command(name="verify-models") |
| 93 | +def verify_models(): |
| 94 | + """Verify model presence and integrity.""" |
| 95 | + typer.echo("Verifying models...") |
| 96 | + typer.echo(f"Model: {utils.MODEL_FILENAME}") |
| 97 | + typer.echo(f"Expected location: {utils.MODEL_PATH}") |
| 98 | + |
| 99 | + if not utils.MODEL_PATH.exists(): |
| 100 | + typer.secho("✗ Model file not found!", fg=typer.colors.RED) |
| 101 | + typer.echo(f"Run 'iscc-sct download-models' to download the model.") |
| 102 | + raise typer.Exit(1) |
| 103 | + |
| 104 | + try: |
| 105 | + utils.check_integrity(utils.MODEL_PATH, utils.MODEL_CHECKSUM) |
| 106 | + typer.secho("✓ Model integrity verified successfully!", fg=typer.colors.GREEN) |
| 107 | + typer.echo(f"Location: {utils.MODEL_PATH}") |
| 108 | + except RuntimeError as e: |
| 109 | + typer.secho(f"✗ Model integrity check failed: {e}", fg=typer.colors.RED, err=True) |
| 110 | + typer.echo("Run 'iscc-sct download-models' to re-download the model.") |
| 111 | + raise typer.Exit(1) |
| 112 | + |
| 113 | + |
| 114 | +@app.command(name="model-info") |
| 115 | +def model_info(): |
| 116 | + """Display model metadata (versions, paths, sizes).""" |
| 117 | + typer.echo("Model Information:") |
| 118 | + typer.echo("=" * 50) |
| 119 | + typer.echo(f"Model Name: {utils.MODEL_FILENAME}") |
| 120 | + typer.echo(f"Model Version: {utils.BASE_VERSION}") |
| 121 | + typer.echo(f"Download URL: {utils.MODEL_URL}") |
| 122 | + typer.echo(f"Local Path: {utils.MODEL_PATH}") |
| 123 | + typer.echo(f"Checksum: {utils.MODEL_CHECKSUM}") |
| 124 | + |
| 125 | + if utils.MODEL_PATH.exists(): |
| 126 | + size_mb = utils.MODEL_PATH.stat().st_size / (1024 * 1024) |
| 127 | + typer.secho(f"\n✓ Status: Downloaded ({size_mb:.2f} MB)", fg=typer.colors.GREEN) |
| 128 | + try: |
| 129 | + utils.check_integrity(utils.MODEL_PATH, utils.MODEL_CHECKSUM) |
| 130 | + typer.secho("✓ Integrity: Verified", fg=typer.colors.GREEN) |
| 131 | + except RuntimeError: |
| 132 | + typer.secho("✗ Integrity: Failed", fg=typer.colors.RED) |
| 133 | + typer.echo(" Run 'iscc-sct download-models' to re-download the model.") |
| 134 | + else: |
| 135 | + typer.secho("\n✗ Status: Not downloaded", fg=typer.colors.YELLOW) |
| 136 | + typer.echo(" Run 'iscc-sct download-models' to download the model.") |
| 137 | + |
| 138 | + |
| 139 | +def main(): |
| 140 | + """Main entry point that handles backward compatibility.""" |
| 141 | + # If called with old-style arguments (no subcommand), route to generate command |
| 142 | + if len(sys.argv) > 1 and not sys.argv[1].startswith("-") and sys.argv[1] not in [ |
| 143 | + "gen", |
| 144 | + "download-models", |
| 145 | + "verify-models", |
| 146 | + "model-info", |
| 147 | + "generate", |
| 148 | + ]: |
| 149 | + # Old-style usage: sct <path> [options] |
| 150 | + # Convert to new style: sct gen <path> [options] |
| 151 | + sys.argv.insert(1, "gen") |
| 152 | + elif len(sys.argv) == 1: |
| 153 | + # No arguments, show help |
| 154 | + app() |
| 155 | + return |
| 156 | + |
| 157 | + app() |
68 | 158 |
|
69 | 159 |
|
70 | 160 | if __name__ == "__main__": # pragma: no cover |
|
0 commit comments