@@ -73,7 +73,6 @@ def xenium(
7373 cells_table : bool = True ,
7474 n_jobs : int = 1 ,
7575 gex_only : bool = True ,
76- cleanup_labels_zarr_tmpdir : bool = True ,
7776 imread_kwargs : Mapping [str , Any ] = MappingProxyType ({}),
7877 image_models_kwargs : Mapping [str , Any ] = MappingProxyType ({}),
7978 labels_models_kwargs : Mapping [str , Any ] = MappingProxyType ({}),
@@ -202,86 +201,62 @@ def xenium(
202201 else :
203202 table = None
204203
205- tmpdir = tempfile .TemporaryDirectory ()
206- if not cleanup_labels_zarr_tmpdir :
207- logging .info (
208- f"Extracting cells zarr in the temporary directory { tmpdir .name } . Since `cleanup_labels_zarr_tmpdir` "
209- f"is set to `False`, this directory cleanup will be deferred (up to the end of the process). "
210- f"If the process is interrupted aburptly cleanup may not occurr. Use with care to avoid uncleaned up "
211- f"temporary directories."
204+ if version is not None and version >= packaging .version .parse ("2.0.0" ) and table is not None :
205+ cell_summary_table = _get_cells_metadata_table_from_zarr (path , XeniumKeys .CELLS_ZARR , specs )
206+ if not cell_summary_table [XeniumKeys .CELL_ID ].equals (table .obs [XeniumKeys .CELL_ID ]):
207+ warnings .warn (
208+ 'The "cell_id" column in the cells metadata table does not match the "cell_id" column in the annotation'
209+ " table. This could be due to trying to read a new version that is not supported yet. Please "
210+ "report this issue." ,
211+ UserWarning ,
212+ stacklevel = 2 ,
213+ )
214+ table .obs [XeniumKeys .Z_LEVEL ] = cell_summary_table [XeniumKeys .Z_LEVEL ]
215+ table .obs [XeniumKeys .NUCLEUS_COUNT ] = cell_summary_table [XeniumKeys .NUCLEUS_COUNT ]
216+
217+ polygons = {}
218+ labels = {}
219+ tables = {}
220+ points = {}
221+ images = {}
222+
223+ # From the public release notes here:
224+ # https://www.10xgenomics.com/support/software/xenium-onboard-analysis/latest/release-notes/release-notes-for-xoa
225+ # we see that for distinguishing between the nuclei of polinucleated cells, the `label_id` column is used.
226+ # This column is currently not found in the preview data, while I think it is needed in order to unambiguously match
227+ # nuclei to cells. Therefore for the moment we only link the table to the cell labels, and not to the nucleus
228+ # labels.
229+ if nucleus_labels :
230+ labels ["nucleus_labels" ], _ = _get_labels_and_indices_mapping (
231+ path ,
232+ XeniumKeys .CELLS_ZARR ,
233+ specs ,
234+ mask_index = 0 ,
235+ labels_name = "nucleus_labels" ,
236+ labels_models_kwargs = labels_models_kwargs ,
212237 )
213- zip_file = path / XeniumKeys .CELLS_ZARR
214- with zipfile .ZipFile (zip_file , "r" ) as zip_ref :
215- zip_ref .extractall (tmpdir .name )
216- try :
217- cells_zarr = zarr .open (str (tmpdir .name ), mode = "r" )
218- if version is not None and version >= packaging .version .parse ("2.0.0" ) and table is not None :
219- cell_summary_table = _get_cells_metadata_table_from_zarr (cells_zarr = cells_zarr )
220- if not cell_summary_table [XeniumKeys .CELL_ID ].equals (table .obs [XeniumKeys .CELL_ID ]):
238+ if cells_labels :
239+ labels ["cell_labels" ], cell_labels_indices_mapping = _get_labels_and_indices_mapping (
240+ path ,
241+ XeniumKeys .CELLS_ZARR ,
242+ specs ,
243+ mask_index = 1 ,
244+ labels_name = "cell_labels" ,
245+ labels_models_kwargs = labels_models_kwargs ,
246+ )
247+ if cell_labels_indices_mapping is not None and table is not None :
248+ if not pd .DataFrame .equals (cell_labels_indices_mapping ["cell_id" ], table .obs [str (XeniumKeys .CELL_ID )]):
221249 warnings .warn (
222- ' The " cell_id" column in the cells metadata table does not match the " cell_id" column in the annotation'
223- " table . This could be due to trying to read a new version that is not supported yet. Please "
224- "report this issue." ,
250+ " The cell_id column in the cell_labels_table does not match the cell_id column derived from the "
251+ "cell labels data . This could be due to trying to read a new version that is not supported yet. "
252+ "Please report this issue." ,
225253 UserWarning ,
226254 stacklevel = 2 ,
227255 )
228- table .obs [XeniumKeys .Z_LEVEL ] = cell_summary_table [XeniumKeys .Z_LEVEL ]
229- table .obs [XeniumKeys .NUCLEUS_COUNT ] = cell_summary_table [XeniumKeys .NUCLEUS_COUNT ]
230-
231- polygons = {}
232- labels = {}
233- tables = {}
234- points = {}
235- images = {}
236-
237- # From the public release notes here:
238- # https://www.10xgenomics.com/support/software/xenium-onboard-analysis/latest/release-notes/release-notes-for-xoa
239- # we see that for distinguishing between the nuclei of polinucleated cells, the `label_id` column is used.
240- # This column is currently not found in the preview data, while I think it is needed in order to unambiguously match
241- # nuclei to cells. Therefore for the moment we only link the table to the cell labels, and not to the nucleus
242- # labels.
243-
244- if nucleus_labels :
245- labels ["nucleus_labels" ], _ = _get_labels_and_indices_mapping (
246- cells_zarr ,
247- cleanup_labels_zarr_tmpdir ,
248- specs ,
249- mask_index = 0 ,
250- labels_name = "nucleus_labels" ,
251- labels_models_kwargs = labels_models_kwargs ,
252- )
253- if cells_labels :
254- labels ["cell_labels" ], cell_labels_indices_mapping = _get_labels_and_indices_mapping (
255- cells_zarr ,
256- cleanup_labels_zarr_tmpdir ,
257- specs ,
258- mask_index = 1 ,
259- labels_name = "cell_labels" ,
260- labels_models_kwargs = labels_models_kwargs ,
261- )
262- if cell_labels_indices_mapping is not None and table is not None :
263- if not pd .DataFrame .equals (
264- cell_labels_indices_mapping ["cell_id" ],
265- table .obs [str (XeniumKeys .CELL_ID )],
266- ):
267- warnings .warn (
268- "The cell_id column in the cell_labels_table does not match the cell_id column derived from the "
269- "cell labels data. This could be due to trying to read a new version that is not supported yet. "
270- "Please report this issue." ,
271- UserWarning ,
272- stacklevel = 2 ,
273- )
274- else :
275- table .obs ["cell_labels" ] = cell_labels_indices_mapping ["label_index" ]
276- if not cells_as_circles :
277- table .uns [TableModel .ATTRS_KEY ][TableModel .INSTANCE_KEY ] = "cell_labels"
278- except Exception as e :
279- tmpdir .cleanup ()
280- raise e
281-
282- # we cleanup now if we don't have lazy data
283- if not cells_labels and not nucleus_labels or cleanup_labels_zarr_tmpdir :
284- tmpdir .cleanup ()
256+ else :
257+ table .obs ["cell_labels" ] = cell_labels_indices_mapping ["label_index" ]
258+ if not cells_as_circles :
259+ table .uns [TableModel .ATTRS_KEY ][TableModel .INSTANCE_KEY ] = "cell_labels"
285260
286261 if nucleus_boundaries :
287262 polygons ["nucleus_boundaries" ] = _get_polygons (
@@ -480,8 +455,8 @@ def _poly(arr: ArrayLike) -> Polygon:
480455
481456
482457def _get_labels_and_indices_mapping (
483- cells_zarr : zarr . Group ,
484- cleanup_labels_zarr_tmpdir : bool ,
458+ path : Path ,
459+ file : str ,
485460 specs : dict [str , Any ],
486461 mask_index : int ,
487462 labels_name : str ,
@@ -490,17 +465,12 @@ def _get_labels_and_indices_mapping(
490465 if mask_index not in [0 , 1 ]:
491466 raise ValueError (f"mask_index must be 0 or 1, found { mask_index } ." )
492467
468+ zip_file = path / XeniumKeys .CELLS_ZARR
469+ store = zarr .storage .ZipStore (zip_file , read_only = True )
470+ z = zarr .open (store , mode = "r" )
493471 # get the labels
494- if cleanup_labels_zarr_tmpdir :
495- masks = cells_zarr ["masks" ][f"{ mask_index } " ][...]
496- else :
497- masks = da .from_array (cells_zarr ["masks" ][f"{ mask_index } " ])
498- labels = Labels2DModel .parse (
499- masks ,
500- dims = ("y" , "x" ),
501- transformations = {"global" : Identity ()},
502- ** labels_models_kwargs ,
503- )
472+ masks = da .from_array (z ["masks" ][f"{ mask_index } " ])
473+ labels = Labels2DModel .parse (masks , dims = ("y" , "x" ), transformations = {"global" : Identity ()}, ** labels_models_kwargs )
504474
505475 # build the matching table
506476 version = _parse_version_of_xenium_analyzer (specs )
@@ -512,7 +482,7 @@ def _get_labels_and_indices_mapping(
512482 # supported in versions < 1.3.0
513483 return labels , None
514484
515- cell_id , dataset_suffix = cells_zarr ["cell_id" ][...].T
485+ cell_id , dataset_suffix = z ["cell_id" ][...].T
516486 cell_id_str = cell_id_str_from_prefix_suffix_uint32 (cell_id , dataset_suffix )
517487
518488 # this information will probably be available in the `label_id` column for version > 2.0.0 (see public
@@ -524,7 +494,7 @@ def _get_labels_and_indices_mapping(
524494 real_label_index = real_label_index [1 :]
525495
526496 if version < packaging .version .parse ("2.0.0" ):
527- expected_label_index = cells_zarr ["seg_mask_value" ][...]
497+ expected_label_index = z ["seg_mask_value" ][...]
528498
529499 if not np .array_equal (expected_label_index , real_label_index ):
530500 raise ValueError (
@@ -533,7 +503,7 @@ def _get_labels_and_indices_mapping(
533503 f"{ expected_label_index } ."
534504 )
535505 else :
536- labels_positional_indices = cells_zarr ["polygon_sets" ][f"{ mask_index } " ]["cell_index" ][...]
506+ labels_positional_indices = z ["polygon_sets" ][f"{ mask_index } " ]["cell_index" ][...]
537507 if not np .array_equal (labels_positional_indices , np .arange (len (labels_positional_indices ))):
538508 raise ValueError (
539509 "The positional indices of the labels do not match the expected range. Please report this issue."
@@ -554,19 +524,26 @@ def _get_labels_and_indices_mapping(
554524
555525@inject_docs (xx = XeniumKeys )
556526def _get_cells_metadata_table_from_zarr (
557- cells_zarr : zarr .Group ,
527+ path : Path ,
528+ file : str ,
529+ specs : dict [str , Any ],
558530) -> AnnData :
559531 """Read cells metadata from ``{xx.CELLS_ZARR}``.
560532
561533 Read the cells summary table, which contains the z_level information for versions < 2.0.0, and also the
562534 nucleus_count for versions >= 2.0.0.
563535 """
564536 # for version >= 2.0.0, in this function we could also parse the segmentation method used to obtain the masks
565- x = cells_zarr ["cell_summary" ][...]
566- column_names = cells_zarr ["cell_summary" ].attrs ["column_names" ]
537+ zip_file = path / XeniumKeys .CELLS_ZARR
538+ store = zarr .storage .ZipStore (zip_file , read_only = True )
539+
540+ z = zarr .open (store , mode = "r" )
541+ x = z ["cell_summary" ][...]
542+ column_names = z ["cell_summary" ].attrs ["column_names" ]
567543 df = pd .DataFrame (x , columns = column_names )
568- cell_id_prefix = cells_zarr ["cell_id" ][:, 0 ]
569- dataset_suffix = cells_zarr ["cell_id" ][:, 1 ]
544+ cell_id_prefix = z ["cell_id" ][:, 0 ]
545+ dataset_suffix = z ["cell_id" ][:, 1 ]
546+ store .close ()
570547
571548 cell_id_str = cell_id_str_from_prefix_suffix_uint32 (cell_id_prefix , dataset_suffix )
572549 df [XeniumKeys .CELL_ID ] = cell_id_str
0 commit comments