Skip to content

Commit 7eff09f

Browse files
committed
update: auto-choose threshold. Fixes #70
1 parent 473a8f9 commit 7eff09f

3 files changed

Lines changed: 39 additions & 4 deletions

File tree

src/scloop/computing/homology.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -435,9 +435,7 @@ def compute_loop_homological_equivalence(
435435
# replace last columns (already sorted by diameters) with identity columns
436436
n_extra_edges = min(len(n_hubs_edges), max_n_edges_relaxation)
437437
if n_extra_edges > 0:
438-
one_ridx_A[: 3 * n_extra_edges :] = np.repeat(
439-
n_hubs_edges[:n_extra_edges], 3
440-
)
438+
one_ridx_A[: 3 * n_extra_edges] = np.repeat(n_hubs_edges[:n_extra_edges], 3)
441439
results_relax, solutions_relax = solve_multiple_gf2_m4ri(
442440
one_ridx_A=one_ridx_A.tolist(),
443441
one_cidx_A=one_cidx_A.tolist(),

src/scloop/data/constants.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
DEFAULT_MAX_COLUMNS_BOUNDARY_MATRIX: int = 10000
3030
DEFAULT_N_BOOTSTRAP: int = 10
3131

32+
DEFAULT_AUTO_THRESHOLD_FACTOR: float = 1.25
33+
3234
DEFAULT_NOISE_SCALE: float = 1e-6
3335
DEFAULT_N_REPS_PER_LOOP: int = 4
3436
DEFAULT_N_COCYCLES_USED: int = 3
@@ -39,7 +41,7 @@
3941
DEFAULT_K_NEIGHBORS_CHECK_EQUIVALENCE: int = 1
4042
DEFAULT_EXTRA_DIAM_EQUIVALENCE: float = 1.0
4143
DEFAULT_WITH_RELAXATION_EQUIVALENCE: bool = True
42-
DEFAULT_N_HUBS_RELAXATION_EQUIVALENCE: int = 2
44+
DEFAULT_N_HUBS_RELAXATION_EQUIVALENCE: int = 3
4345
DEFAULT_MAX_N_EDGES_RELAXATION_EQUIVALENCE: int = 500
4446

4547
DEFAULT_WEIGHT_HODGE: float = 0.5

src/scloop/tools/_loops.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
TimeElapsedColumn,
1818
TimeRemainingColumn,
1919
)
20+
from scipy.spatial.distance import pdist
2021

2122
from ..data.constants import (
23+
DEFAULT_AUTO_THRESHOLD_FACTOR,
2224
DEFAULT_K_NEIGHBORS_CHECK_EQUIVALENCE,
2325
DEFAULT_MAX_COLUMNS_BOUNDARY_MATRIX,
2426
DEFAULT_MAXITER_EIGENDECOMPOSITION,
@@ -103,6 +105,39 @@ def find_loops(
103105
meta.bootstrap = BootstrapMeta()
104106
meta.bootstrap.life_pct = tightness_loops
105107
hd: HomologyData = HomologyData(meta=meta)
108+
"""
109+
============ Auto-choose PH threshold ============
110+
- upper bound would be the max pw dist
111+
- choose a value such that all 1-loop dies
112+
==================================================
113+
"""
114+
if threshold_homology is None:
115+
assert meta.preprocess is not None
116+
assert meta.preprocess.embedding_method is not None
117+
emb = adata.obsm[f"X_{meta.preprocess.embedding_method}"]
118+
selected_indices = (
119+
meta.preprocess.indices_downsample
120+
if meta.preprocess.indices_downsample is not None
121+
else list(range(emb.shape[0]))
122+
)
123+
max_pw_dist = float(pdist(emb[selected_indices]).max())
124+
if verbose:
125+
logger.info(
126+
f"Auto-threshold: max pairwise distance = {max_pw_dist:.4f}"
127+
)
128+
hd._compute_homology(adata=adata, thresh=max_pw_dist)
129+
# need some room for bootstrap, loops will die later for fewer points
130+
auto_factor = (kwargs_bootstrap or {}).get(
131+
"auto_threshold_factor", DEFAULT_AUTO_THRESHOLD_FACTOR
132+
)
133+
max_h1_death = float(np.max(hd.persistence_diagram[1][1]))
134+
threshold_homology = max_h1_death * auto_factor
135+
if verbose:
136+
logger.info(
137+
f"Auto-threshold: max H1 death = {max_h1_death:.4f}, "
138+
f"using threshold = {threshold_homology:.4f} (factor={auto_factor})"
139+
)
140+
106141
sparse_dist_mat = hd._compute_homology(adata=adata, thresh=threshold_homology)
107142
boundary_thresh = threshold_boundary
108143
if boundary_thresh is None:

0 commit comments

Comments
 (0)