|
17 | 17 | TimeElapsedColumn, |
18 | 18 | TimeRemainingColumn, |
19 | 19 | ) |
| 20 | +from scipy.spatial.distance import pdist |
20 | 21 |
|
21 | 22 | from ..data.constants import ( |
| 23 | + DEFAULT_AUTO_THRESHOLD_FACTOR, |
22 | 24 | DEFAULT_K_NEIGHBORS_CHECK_EQUIVALENCE, |
23 | 25 | DEFAULT_MAX_COLUMNS_BOUNDARY_MATRIX, |
24 | 26 | DEFAULT_MAXITER_EIGENDECOMPOSITION, |
@@ -103,6 +105,39 @@ def find_loops( |
103 | 105 | meta.bootstrap = BootstrapMeta() |
104 | 106 | meta.bootstrap.life_pct = tightness_loops |
105 | 107 | hd: HomologyData = HomologyData(meta=meta) |
| 108 | + """ |
| 109 | + ============ Auto-choose PH threshold ============ |
| 110 | + - upper bound would be the max pw dist |
| 111 | + - choose a value such that all 1-loop dies |
| 112 | + ================================================== |
| 113 | + """ |
| 114 | + if threshold_homology is None: |
| 115 | + assert meta.preprocess is not None |
| 116 | + assert meta.preprocess.embedding_method is not None |
| 117 | + emb = adata.obsm[f"X_{meta.preprocess.embedding_method}"] |
| 118 | + selected_indices = ( |
| 119 | + meta.preprocess.indices_downsample |
| 120 | + if meta.preprocess.indices_downsample is not None |
| 121 | + else list(range(emb.shape[0])) |
| 122 | + ) |
| 123 | + max_pw_dist = float(pdist(emb[selected_indices]).max()) |
| 124 | + if verbose: |
| 125 | + logger.info( |
| 126 | + f"Auto-threshold: max pairwise distance = {max_pw_dist:.4f}" |
| 127 | + ) |
| 128 | + hd._compute_homology(adata=adata, thresh=max_pw_dist) |
| 129 | + # need some room for bootstrap, loops will die later for fewer points |
| 130 | + auto_factor = (kwargs_bootstrap or {}).get( |
| 131 | + "auto_threshold_factor", DEFAULT_AUTO_THRESHOLD_FACTOR |
| 132 | + ) |
| 133 | + max_h1_death = float(np.max(hd.persistence_diagram[1][1])) |
| 134 | + threshold_homology = max_h1_death * auto_factor |
| 135 | + if verbose: |
| 136 | + logger.info( |
| 137 | + f"Auto-threshold: max H1 death = {max_h1_death:.4f}, " |
| 138 | + f"using threshold = {threshold_homology:.4f} (factor={auto_factor})" |
| 139 | + ) |
| 140 | + |
106 | 141 | sparse_dist_mat = hd._compute_homology(adata=adata, thresh=threshold_homology) |
107 | 142 | boundary_thresh = threshold_boundary |
108 | 143 | if boundary_thresh is None: |
|
0 commit comments