2929 sample_farthest_points ,
3030 sample_farthest_points_randomized ,
3131)
32+ from ..utils .denoise .Sanity_py import sample_posterior_predictive_counts
3233from ..utils .distance_metrics .frechet_py import compute_pairwise_loop_frechet
3334from ..utils .linear_algebra_gf2 import ( # type: ignore
3435 solve_multiple_gf2_m4ri , # type: ignore[import-not-found]
3839 from ..data .containers import BoundaryMatrixD1
3940
4041
42+ def _sample_bootstrap_embedding (
43+ adata : AnnData ,
44+ meta : ScloopMeta ,
45+ selected_indices : list [int ],
46+ sample_idx : np .ndarray ,
47+ bootstrap_noise_model : str ,
48+ noise_scale : float ,
49+ ) -> tuple [np .ndarray , list [int ]]:
50+ assert meta .preprocess is not None
51+ assert meta .preprocess .embedding_method is not None
52+
53+ boot_idx = [selected_indices [int (i )] for i in sample_idx .tolist ()]
54+ emb = np .asarray (adata .obsm [f"X_{ meta .preprocess .embedding_method } " ])
55+
56+ if bootstrap_noise_model == "sanity" and meta .preprocess .embedding_method == "pca" :
57+ X = sample_posterior_predictive_counts (
58+ adata = adata ,
59+ cell_idx = np .asarray (boot_idx , dtype = np .int64 ),
60+ scale_before_pca = meta .preprocess .scale_before_pca ,
61+ n_pca_comps = meta .preprocess .n_pca_comps ,
62+ )
63+ else :
64+ X_ref = emb [selected_indices ]
65+ X = emb [boot_idx ]
66+ std_X = np .std (X_ref , axis = 0 )
67+ X = X + np .random .normal (scale = std_X * noise_scale , size = X .shape )
68+
69+ return X , boot_idx
70+
71+
4172def compute_sparse_pairwise_distance (
4273 adata : AnnData ,
4374 meta : ScloopMeta ,
4475 bootstrap : bool = False ,
4576 noise_scale : float = 1e-3 ,
77+ bootstrap_noise_model : str = "gaussian" ,
4678 thresh : Diameter_t | None = None ,
4779 bootstrap_sampling : str = "resample" ,
4880 bootstrap_downsample_fraction : Percent_t = 2 / 3 ,
@@ -68,22 +100,28 @@ def compute_sparse_pairwise_distance(
68100 if bootstrap_sampling == "resample" :
69101 sample_idx = np .random .choice (
70102 len (selected_indices ), size = len (selected_indices ), replace = True
71- ).tolist ()
72- boot_idx = [selected_indices [i ] for i in sample_idx ]
73- std_X = np .std (X , axis = 0 )
74- X = X [sample_idx ] + np .random .normal (
75- scale = std_X * noise_scale , size = X .shape
103+ )
104+ X , boot_idx = _sample_bootstrap_embedding (
105+ adata = adata ,
106+ meta = meta ,
107+ selected_indices = selected_indices ,
108+ sample_idx = np .asarray (sample_idx , dtype = np .int64 ),
109+ bootstrap_noise_model = bootstrap_noise_model ,
110+ noise_scale = noise_scale ,
76111 )
77112 elif bootstrap_sampling == "fps" :
78113 n_keep = max (
79114 2 , int (round (len (selected_indices ) * bootstrap_downsample_fraction ))
80115 )
81116 n_keep = min (n_keep , len (selected_indices ))
82117 sample_idx = sample_farthest_points (X , n_keep )
83- boot_idx = [selected_indices [int (i )] for i in sample_idx .tolist ()]
84- std_X = np .std (X , axis = 0 )
85- X = X [sample_idx ] + np .random .normal (
86- scale = std_X * noise_scale , size = (n_keep , X .shape [1 ])
118+ X , boot_idx = _sample_bootstrap_embedding (
119+ adata = adata ,
120+ meta = meta ,
121+ selected_indices = selected_indices ,
122+ sample_idx = np .asarray (sample_idx , dtype = np .int64 ),
123+ bootstrap_noise_model = bootstrap_noise_model ,
124+ noise_scale = noise_scale ,
87125 )
88126 elif bootstrap_sampling == "fps_random" :
89127 if bootstrap_fps_top_k <= 0 :
@@ -97,10 +135,13 @@ def compute_sparse_pairwise_distance(
97135 sample_idx = sample_farthest_points_randomized (
98136 X , n_keep , top_k = bootstrap_fps_top_k , alpha = bootstrap_fps_alpha
99137 )
100- boot_idx = [selected_indices [int (i )] for i in sample_idx .tolist ()]
101- std_X = np .std (X , axis = 0 )
102- X = X [sample_idx ] + np .random .normal (
103- scale = std_X * noise_scale , size = (n_keep , X .shape [1 ])
138+ X , boot_idx = _sample_bootstrap_embedding (
139+ adata = adata ,
140+ meta = meta ,
141+ selected_indices = selected_indices ,
142+ sample_idx = np .asarray (sample_idx , dtype = np .int64 ),
143+ bootstrap_noise_model = bootstrap_noise_model ,
144+ noise_scale = noise_scale ,
104145 )
105146 elif (
106147 bootstrap_sampling == "herding"
@@ -118,10 +159,13 @@ def compute_sparse_pairwise_distance(
118159 frequency_seed = bootstrap_herding_seed ,
119160 n_features = int (bootstrap_herding_n_features ),
120161 )
121- boot_idx = [selected_indices [int (i )] for i in sample_idx .tolist ()]
122- std_X = np .std (X , axis = 0 )
123- X = X [sample_idx ] + np .random .normal (
124- scale = std_X * noise_scale , size = (n_keep , X .shape [1 ])
162+ X , boot_idx = _sample_bootstrap_embedding (
163+ adata = adata ,
164+ meta = meta ,
165+ selected_indices = selected_indices ,
166+ sample_idx = np .asarray (sample_idx , dtype = np .int64 ),
167+ bootstrap_noise_model = bootstrap_noise_model ,
168+ noise_scale = noise_scale ,
125169 )
126170 else :
127171 boot_idx = selected_indices
0 commit comments