Source code for spatialperturb.cli

"""Command-line interface for SpatialPerturb."""

from __future__ import annotations

from pathlib import Path

import anndata as ad
import typer

from . import __version__
from .benchmarks import available_benchmarks, run_core_benchmark, run_nature_methods_breast_analysis, run_reference_projection_benchmark
from .datasets import available_datasets, fetch_dataset, prepare_dataset
from .io import read_xenium
from .schema import schema_summary, validate_spatialperturb_schema

app = typer.Typer(help="SpatialPerturb CLI", add_completion=False, no_args_is_help=True)


[docs] @app.callback() def main() -> None: """Command group for SpatialPerturb utilities."""
[docs] @app.command() def version() -> None: """Print the installed package version.""" typer.echo(__version__)
[docs] @app.command("datasets") def list_datasets() -> None: """List built-in dataset cards.""" typer.echo(available_datasets().to_string(index=False))
[docs] @app.command("benchmarks") def list_benchmarks() -> None: """List benchmark tracks.""" typer.echo(available_benchmarks().to_string(index=False))
[docs] @app.command("fetch-dataset") def fetch_dataset_command( name: str, cache_dir: Path = Path(".spatialperturb-cache"), force: bool = False, ) -> None: """Download raw public files for a registered dataset.""" result = fetch_dataset(name, cache_dir=cache_dir, force=force) typer.echo(f"dataset={result['dataset']}") typer.echo(f"status={result['status']}") typer.echo(f"raw_dir={result['raw_dir']}")
[docs] @app.command("prepare-dataset") def prepare_dataset_command( name: str, cache_dir: Path = Path(".spatialperturb-cache"), output_dir: Path | None = None, ) -> None: """Prepare a dataset into SpatialPerturb-compatible outputs.""" result = prepare_dataset(name, cache_dir=cache_dir, output_dir=output_dir) typer.echo(f"dataset={result['dataset']}") typer.echo(f"status={result['status']}") typer.echo(f"prepared_path={result.get('prepared_path')}") if result.get("note"): typer.echo(result["note"])
[docs] @app.command("prepare-xenium") def prepare_xenium_command( path: Path, output_path: Path, cell_group_path: Path | None = None, roi_geojson_path: Path | None = None, sample_name: str | None = None, load_molecules: bool = False, ) -> None: """Prepare a Xenium directory into a schema-compliant h5ad file.""" adata = read_xenium( path, cell_group_path=cell_group_path, roi_geojson_path=roi_geojson_path, sample_name=sample_name, load_molecules=load_molecules, ) output_path.parent.mkdir(parents=True, exist_ok=True) adata.write_h5ad(output_path) typer.echo(f"prepared_path={output_path.resolve()}") typer.echo(f"n_obs={adata.n_obs}") typer.echo(f"n_vars={adata.n_vars}")
[docs] @app.command("run-benchmark") def run_benchmark_command( dataset: str, cache_dir: Path = Path(".spatialperturb-cache"), output_dir: Path | None = None, reference_dataset: str | None = None, method: str = "auto", ) -> None: """Run the paper-grade benchmark workflow for a dataset.""" config: dict[str, object] = {"cache_dir": cache_dir} if reference_dataset is not None: config["reference_dataset"] = reference_dataset if method != "auto": config["method"] = method report_root = output_dir or Path("reports") / dataset results = run_core_benchmark(dataset, config=config, output_dir=report_root) typer.echo(f"dataset={dataset}") typer.echo(f"report_dir={report_root.resolve()}") typer.echo(f"tables={sorted(key for key, value in results.items() if hasattr(value, 'to_csv'))}")
[docs] @app.command("render-paper-figures") def render_paper_figures_command( dataset: str, cache_dir: Path = Path(".spatialperturb-cache"), output_dir: Path | None = None, reference_dataset: str | None = None, ) -> None: """Render the canonical paper figures for a dataset.""" config: dict[str, object] = {"cache_dir": cache_dir} if reference_dataset is not None: config["reference_dataset"] = reference_dataset report_root = output_dir or Path("reports") / dataset run_core_benchmark(dataset, config=config, output_dir=report_root) typer.echo(f"dataset={dataset}") typer.echo(f"figures_dir={(report_root.resolve() / 'figures')}")
[docs] @app.command("run-reference-benchmark") def run_reference_benchmark_command( spatial_input: Path, output_dir: Path, cache_dir: Path = Path(".spatialperturb-cache"), cell_group_path: Path | None = None, roi_geojson_path: Path | None = None, sample_name: str | None = None, reference_datasets: str = "gse241115_breast_cropseq,gse281048_pathway_atlas", ) -> None: """Run the Xenium + Perturb-seq reference projection workflow.""" datasets = [name.strip() for name in reference_datasets.split(",") if name.strip()] config: dict[str, object] = { "cache_dir": cache_dir, "cell_group_path": cell_group_path, "roi_geojson_path": roi_geojson_path, "sample_name": sample_name, } results = run_reference_projection_benchmark( spatial_input, reference_datasets=datasets, config=config, output_dir=output_dir, ) typer.echo(f"dataset={results['manifest']['dataset'] if 'manifest' in results else spatial_input}") typer.echo(f"report_dir={output_dir.resolve()}") typer.echo(f"references={','.join(datasets)}")
[docs] @app.command("run-nature-methods-breast-analysis") def run_nature_methods_breast_analysis_command( spatial_input: Path, output_dir: Path, cache_dir: Path = Path(".spatialperturb-cache"), cell_group_path: Path | None = None, roi_geojson_path: Path | None = None, sample_name: str | None = None, reference_datasets: str = "gse241115_breast_cropseq,gse281048_pathway_atlas", n_random: int = 25, n_spatial_permutations: int = 25, n_bootstrap: int = 100, min_claim_cells: int = 50, ) -> None: """Run the publication-grade Nature Methods breast short-communication workflow.""" datasets = [name.strip() for name in reference_datasets.split(",") if name.strip()] config: dict[str, object] = { "cache_dir": cache_dir, "cell_group_path": cell_group_path, "roi_geojson_path": roi_geojson_path, "sample_name": sample_name, "n_random": n_random, "n_label_shuffles": n_random, "n_spatial_permutations": n_spatial_permutations, "n_bootstrap": n_bootstrap, "min_claim_cells": min_claim_cells, "reference_effect_size_only": True, } results = run_nature_methods_breast_analysis( spatial_input, reference_datasets=datasets, config=config, output_dir=output_dir, ) manifest = results.get("manifest", {}) typer.echo(f"dataset={manifest.get('dataset', spatial_input)}") typer.echo(f"report_dir={output_dir.resolve()}") typer.echo(f"references={','.join(map(str, manifest.get('reference_datasets', datasets)))}") typer.echo(f"claim_level_rows={manifest.get('summary', {}).get('claim_level_rows', 'NA')}")
[docs] @app.command("validate") def validate(path: Path) -> None: """Validate that an h5ad file follows the SpatialPerturb schema.""" adata = ad.read_h5ad(path) validate_spatialperturb_schema(adata) typer.echo(schema_summary(adata).rename_axis("metric").reset_index(name="value").to_string(index=False))
if __name__ == "__main__": app()