Skip to content

Commit e644a65

Browse files
committed
Merge remote-tracking branch 'origin/main' into spanorm
2 parents 5298977 + ac0c0c0 commit e644a65

File tree

34 files changed

+1275
-291
lines changed

34 files changed

+1275
-291
lines changed

_viash.yaml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,3 @@ repositories:
7272
type: github
7373
repo: openproblems-bio/openproblems
7474
tag: build/main
75-
- name: core
76-
type: github
77-
repo: openproblems-bio/core
78-
tag: build/main
79-
path: viash/core

scripts/create_resources/process_datasets.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,30 @@ param_list:
1717
- id: "mouse_brain_combined/rep1"
1818
input_sp: "$input_dir/10x_xenium/2023_10x_mouse_brain_xenium/rep1/dataset.zarr"
1919
input_sc: "$input_dir/allen_brain_cell_atlas/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad"
20+
dataset_name: "Mouse brain combined 2023 tenx Xenium replicate 1 2023 Yao scRNAseq"
21+
dataset_url: "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard;https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE246717"
22+
dataset_reference: "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard;10.1038/s41586-023-06812-z"
23+
dataset_summary: "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1);A high-resolution scRNAseq atlas of cell types in the whole mouse brain"
24+
dataset_description: "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1). Replicate results demonstrate the high reproducibility of data generated by the platform. 10x Genomics obtained tissue from a C57BL/6 mouse from Charles River Laboratories. Three adjacent 10µm sections were placed on the same slide. Tissues were prepared following the demonstrated protocols Xenium In Situ for Fresh Frozen Tissues - Tissue Preparation Guide (CG000579) and Xenium In Situ for Fresh Frozen Tissues - Fixation & Permeabilization (CG000581).;See dataset_reference for more information. Note that we only took the 10xv2 data from the dataset."
25+
dataset_organism: "mus_musculus"
2026
- id: "mouse_brain_combined/rep2"
2127
input_sp: "$input_dir/10x_xenium/2023_10x_mouse_brain_xenium/rep2/dataset.zarr"
2228
input_sc: "$input_dir/allen_brain_cell_atlas/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad"
29+
dataset_name: "Mouse brain combined 2023 tenx Xenium replicate 2 2023 Yao scRNAseq"
30+
dataset_url: "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard;https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE246717"
31+
dataset_reference: "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard;10.1038/s41586-023-06812-z"
32+
dataset_summary: "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1);A high-resolution scRNAseq atlas of cell types in the whole mouse brain"
33+
dataset_description: "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1). Replicate results demonstrate the high reproducibility of data generated by the platform. 10x Genomics obtained tissue from a C57BL/6 mouse from Charles River Laboratories. Three adjacent 10µm sections were placed on the same slide. Tissues were prepared following the demonstrated protocols Xenium In Situ for Fresh Frozen Tissues - Tissue Preparation Guide (CG000579) and Xenium In Situ for Fresh Frozen Tissues - Fixation & Permeabilization (CG000581).;See dataset_reference for more information. Note that we only took the 10xv2 data from the dataset."
34+
dataset_organism: "mus_musculus"
2335
- id: "mouse_brain_combined/rep3"
2436
input_sp: "$input_dir/10x_xenium/2023_10x_mouse_brain_xenium/rep3/dataset.zarr"
2537
input_sc: "$input_dir/allen_brain_cell_atlas/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad"
38+
dataset_name: "Mouse brain combined 2023 tenx Xenium replicate 3 2023 Yao scRNAseq"
39+
dataset_url: "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard;https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE246717"
40+
dataset_reference: "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard;10.1038/s41586-023-06812-z"
41+
dataset_summary: "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1);A high-resolution scRNAseq atlas of cell types in the whole mouse brain"
42+
dataset_description: "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1). Replicate results demonstrate the high reproducibility of data generated by the platform. 10x Genomics obtained tissue from a C57BL/6 mouse from Charles River Laboratories. Three adjacent 10µm sections were placed on the same slide. Tissues were prepared following the demonstrated protocols Xenium In Situ for Fresh Frozen Tissues - Tissue Preparation Guide (CG000579) and Xenium In Situ for Fresh Frozen Tissues - Fixation & Permeabilization (CG000581).;See dataset_reference for more information. Note that we only took the 10xv2 data from the dataset."
43+
dataset_organism: "mus_musculus"
2644
2745
output_sc: "\$id/output_sc.h5ad"
2846
output_sp: "\$id/output_sp.zarr"

scripts/create_resources/process_vizgen_merscope.sh

Lines changed: 17 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@ cat > /tmp/params.yaml << HERE
1414
param_list:
1515
1616
- id: "vizgen_merscope/2022_vizgen_human_breast_cancer_merfish/rep1"
17-
gcloud_bucket: "vz-ffpe-showcase"
18-
dataset_bucket_name: "HumanBreastCancerPatient1"
17+
input: "gs://vz-ffpe-showcase/HumanBreastCancerPatient1"
1918
dataset_name: "Vizgen Human Breast Cancer MERFISH Patient1"
2019
dataset_url: "https://info.vizgen.com/ffpe-showcase?submissionGuid=a93dbab5-c128-4269-afe3-82ea2bf9cdaf"
2120
dataset_summary: "Human Breast Cancer data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -24,8 +23,7 @@ param_list:
2423
segmentation_id: ["cell"]
2524
2625
- id: "vizgen_merscope/2022_vizgen_human_liver_cancer_merfish/rep1"
27-
gcloud_bucket: "vz-ffpe-showcase"
28-
dataset_bucket_name: "HumanLiverCancerPatient1"
26+
input: "gs://vz-ffpe-showcase/HumanLiverCancerPatient1"
2927
dataset_name: "Vizgen Human Liver Cancer MERFISH Patient1"
3028
dataset_url: "https://info.vizgen.com/ffpe-showcase?submissionGuid=a93dbab5-c128-4269-afe3-82ea2bf9cdaf"
3129
dataset_summary: "Human Liver Cancer data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -34,8 +32,7 @@ param_list:
3432
segmentation_id: ["cell"]
3533
3634
- id: "vizgen_merscope/2022_vizgen_human_liver_cancer_merfish/rep2"
37-
gcloud_bucket: "vz-ffpe-showcase"
38-
dataset_bucket_name: "HumanLiverCancerPatient2"
35+
input: "gs://vz-ffpe-showcase/HumanLiverCancerPatient2"
3936
dataset_name: "Vizgen Human Liver Cancer MERFISH Patient2"
4037
dataset_url: "https://info.vizgen.com/ffpe-showcase?submissionGuid=a93dbab5-c128-4269-afe3-82ea2bf9cdaf"
4138
dataset_summary: "Human Liver Cancer data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -44,8 +41,7 @@ param_list:
4441
segmentation_id: ["cell"]
4542
4643
- id: "vizgen_merscope/2022_vizgen_human_lung_cancer_merfish/rep1"
47-
gcloud_bucket: "vz-ffpe-showcase"
48-
dataset_bucket_name: "HumanLungCancerPatient1"
44+
input: "gs://vz-ffpe-showcase/HumanLungCancerPatient1"
4945
dataset_name: "Vizgen Human Lung Cancer MERFISH Patient1"
5046
dataset_url: "https://info.vizgen.com/ffpe-showcase?submissionGuid=a93dbab5-c128-4269-afe3-82ea2bf9cdaf"
5147
dataset_summary: "Human Lung Cancer data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -54,8 +50,7 @@ param_list:
5450
segmentation_id: ["cell"]
5551
5652
- id: "vizgen_merscope/2022_vizgen_human_lung_cancer_merfish/rep2"
57-
gcloud_bucket: "vz-ffpe-showcase"
58-
dataset_bucket_name: "HumanLungCancerPatient2"
53+
input: "gs://vz-ffpe-showcase/HumanLungCancerPatient2"
5954
dataset_name: "Vizgen Human Lung Cancer MERFISH Patient2"
6055
dataset_url: "https://info.vizgen.com/ffpe-showcase?submissionGuid=a93dbab5-c128-4269-afe3-82ea2bf9cdaf"
6156
dataset_summary: "Human Lung Cancer data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -78,14 +73,11 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
7873
--config common/nextflow_helpers/labels_tw.config \
7974
--labels datasets,vizgen_merscope
8075

81-
82-
8376
# More datasets that can be simply added:
8477
# TODO: Make a decision on replicate naming (see ovarian cancer replicate that has multiple slices)
8578

8679
# - id: "vizgen_merscope/2022_vizgen_human_colon_cancer_merfish/rep1"
87-
# gcloud_bucket: "vz-ffpe-showcase"
88-
# dataset_bucket_name: "HumanColonCancerPatient1"
80+
# input: "gs://vz-ffpe-showcase/HumanColonCancerPatient1"
8981
# dataset_name: "2022 Vizgen Human Colon Cancer MERFISH Patient1"
9082
# dataset_url: "https://info.vizgen.com/ffpe-showcase"
9183
# dataset_summary: "Human Colon Cancer data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -94,8 +86,7 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
9486
# segmentation_id: ["cell"]
9587

9688
# - id: "vizgen_merscope/2022_vizgen_human_colon_cancer_merfish/rep2"
97-
# gcloud_bucket: "vz-ffpe-showcase"
98-
# dataset_bucket_name: "HumanColonCancerPatient2"
89+
# input: "gs://vz-ffpe-showcase/HumanColonCancerPatient2"
9990
# dataset_name: "2022 Vizgen Human Colon Cancer MERFISH Patient2"
10091
# dataset_url: "https://info.vizgen.com/ffpe-showcase"
10192
# dataset_summary: "Human Colon Cancer data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -104,8 +95,7 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
10495
# segmentation_id: ["cell"]
10596

10697
# - id: "vizgen_merscope/2022_vizgen_human_melanoma_merfish/rep1"
107-
# gcloud_bucket: "vz-ffpe-showcase"
108-
# dataset_bucket_name: "HumanMelanomaPatient1"
98+
# input: "gs://vz-ffpe-showcase/HumanMelanomaPatient1"
10999
# dataset_name: "2022 Vizgen Human Melanoma MERFISH Patient1"
110100
# dataset_url: "https://info.vizgen.com/ffpe-showcase"
111101
# dataset_summary: "Human Melanoma data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -114,8 +104,7 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
114104
# segmentation_id: ["cell"]
115105

116106
# - id: "vizgen_merscope/2022_vizgen_human_melanoma_merfish/rep2"
117-
# gcloud_bucket: "vz-ffpe-showcase"
118-
# dataset_bucket_name: "HumanMelanomaPatient2"
107+
# input: "gs://vz-ffpe-showcase/HumanMelanomaPatient2"
119108
# dataset_name: "2022 Vizgen Human Melanoma MERFISH Patient2"
120109
# dataset_url: "https://info.vizgen.com/ffpe-showcase"
121110
# dataset_summary: "Human Melanoma data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -124,8 +113,7 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
124113
# segmentation_id: ["cell"]
125114

126115
# - id: "vizgen_merscope/2022_vizgen_human_ovarian_cancer_merfish/rep1"
127-
# gcloud_bucket: "vz-ffpe-showcase"
128-
# dataset_bucket_name: "HumanOvarianCancerPatient1"
116+
# input: "gs://vz-ffpe-showcase/HumanOvarianCancerPatient1"
129117
# dataset_name: "2022 Vizgen Human Ovarian Cancer MERFISH Patient1"
130118
# dataset_url: "https://info.vizgen.com/ffpe-showcase"
131119
# dataset_summary: "Human Ovarian Cancer data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -135,8 +123,7 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
135123

136124
# # Patient 2 has multiple slices
137125
# - id: "vizgen_merscope/2022_vizgen_human_ovarian_cancer_merfish/rep2_slice1"
138-
# gcloud_bucket: "vz-ffpe-showcase"
139-
# dataset_bucket_name: "HumanOvarianCancerPatient2Slice1"
126+
# input: "gs://vz-ffpe-showcase/HumanOvarianCancerPatient2Slice1"
140127
# dataset_name: "2022 Vizgen Human Ovarian Cancer MERFISH Patient2 Slice1"
141128
# dataset_url: "https://info.vizgen.com/ffpe-showcase"
142129
# dataset_summary: "Human Ovarian Cancer data (Slice 1) from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -145,8 +132,7 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
145132
# segmentation_id: ["cell"]
146133

147134
# - id: "vizgen_merscope/2022_vizgen_human_ovarian_cancer_merfish/rep2_slice2"
148-
# gcloud_bucket: "vz-ffpe-showcase"
149-
# dataset_bucket_name: "HumanOvarianCancerPatient2Slice2"
135+
# input: "gs://vz-ffpe-showcase/HumanOvarianCancerPatient2Slice2"
150136
# dataset_name: "2022 Vizgen Human Ovarian Cancer MERFISH Patient2 Slice2"
151137
# dataset_url: "https://info.vizgen.com/ffpe-showcase"
152138
# dataset_summary: "Human Ovarian Cancer data (Slice 2) from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -155,8 +141,7 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
155141
# segmentation_id: ["cell"]
156142

157143
# - id: "vizgen_merscope/2022_vizgen_human_ovarian_cancer_merfish/rep2_slice3"
158-
# gcloud_bucket: "vz-ffpe-showcase"
159-
# dataset_bucket_name: "HumanOvarianCancerPatient2Slice3"
144+
# input: "gs://vz-ffpe-showcase/HumanOvarianCancerPatient2Slice3"
160145
# dataset_name: "2022 Vizgen Human Ovarian Cancer MERFISH Patient2 Slice3"
161146
# dataset_url: "https://info.vizgen.com/ffpe-showcase"
162147
# dataset_summary: "Human Ovarian Cancer data (Slice 3) from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -165,8 +150,7 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
165150
# segmentation_id: ["cell"]
166151

167152
# - id: "vizgen_merscope/2022_vizgen_human_prostate_cancer_merfish/rep1"
168-
# gcloud_bucket: "vz-ffpe-showcase"
169-
# dataset_bucket_name: "HumanProstateCancerPatient1"
153+
# input: "gs://vz-ffpe-showcase/HumanProstateCancerPatient1"
170154
# dataset_name: "2022 Vizgen Human Prostate Cancer MERFISH Patient1"
171155
# dataset_url: "https://info.vizgen.com/ffpe-showcase"
172156
# dataset_summary: "Human Prostate Cancer data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -175,8 +159,7 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
175159
# segmentation_id: ["cell"]
176160

177161
# - id: "vizgen_merscope/2022_vizgen_human_prostate_cancer_merfish/rep2"
178-
# gcloud_bucket: "vz-ffpe-showcase"
179-
# dataset_bucket_name: "HumanProstateCancerPatient2"
162+
# input: "gs://vz-ffpe-showcase/HumanProstateCancerPatient2"
180163
# dataset_name: "2022 Vizgen Human Prostate Cancer MERFISH Patient2"
181164
# dataset_url: "https://info.vizgen.com/ffpe-showcase"
182165
# dataset_summary: "Human Prostate Cancer data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -185,8 +168,7 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
185168
# segmentation_id: ["cell"]
186169

187170
# - id: "vizgen_merscope/2022_vizgen_human_uterine_cancer_merfish/rep1"
188-
# gcloud_bucket: "vz-ffpe-showcase"
189-
# dataset_bucket_name: "HumanUterineCancerPatient1"
171+
# input: "gs://vz-ffpe-showcase/HumanUterineCancerPatient1"
190172
# dataset_name: "2022 Vizgen Human Uterine Cancer MERFISH Patient1"
191173
# dataset_url: "https://info.vizgen.com/ffpe-showcase"
192174
# dataset_summary: "Human Uterine Cancer data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
@@ -195,8 +177,7 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
195177
# segmentation_id: ["cell"]
196178

197179
# - id: "vizgen_merscope/2022_vizgen_human_uterine_cancer_merfish/rep2"
198-
# gcloud_bucket: "vz-ffpe-showcase"
199-
# dataset_bucket_name: "HumanUterineCancerPatient2"
180+
# input: "gs://vz-ffpe-showcase/HumanUterineCancerPatient2"
200181
# dataset_name: "2022 Vizgen Human Uterine Cancer MERFISH Patient2"
201182
# dataset_url: "https://info.vizgen.com/ffpe-showcase"
202183
# dataset_summary: "Human Uterine Cancer data from the MERSCOPE FFPE Human Immuno-Oncology Data Release."
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/bin/bash
2+
3+
# get the root of the directory
4+
REPO_ROOT=$(git rev-parse --show-toplevel)
5+
6+
# ensure that the command below is run from the root of the repository
7+
cd "$REPO_ROOT"
8+
9+
set -e
10+
11+
publish_dir="s3://openproblems-data/resources/datasets"
12+
13+
# Note that the current download script and processing workflow have a specific default parameter set for the given dataset.
14+
# No additional datasets are supported by that component/workflow. Therefore the default parameters are used and don't need
15+
# to be specified here.
16+
17+
cat > /tmp/params.yaml << HERE
18+
param_list:
19+
- id: wu_human_breast_cancer_sc/2021Wu_human_breast_cancer_sc
20+
cancer_subtypes:
21+
- HER2+
22+
- TNBC
23+
- ER+
24+
25+
keep_files: false
26+
27+
output_dataset: "\$id/dataset.h5ad"
28+
output_meta: "\$id/dataset_meta.yaml"
29+
output_state: "\$id/state.yaml"
30+
publish_dir: "$publish_dir"
31+
HERE
32+
33+
tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \
34+
--revision build/main \
35+
--pull-latest \
36+
--main-script target/nextflow/datasets/workflows/process_wu_human_breast_cancer_sc/main.nf \
37+
--workspace 53907369739130 \
38+
--compute-env 6TeIFgV5OY4pJCk8I0bfOh \
39+
--params-file /tmp/params.yaml \
40+
--config common/nextflow_helpers/labels_tw.config \
41+
--labels datasets,wu_human_breast_cancer_sc
42+
43+
aws s3 sync \
44+
s3://openproblems-data/resources/datasets/wu_human_breast_cancer_sc/2021Wu_human_breast_cancer_sc \
45+
resources/datasets/wu_human_breast_cancer_sc/2021Wu_human_breast_cancer_sc

scripts/create_test_resources/test_pipeline.sh

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -75,17 +75,18 @@ viash run src/metrics/similarity/config.vsh.yaml -- \
7575

7676
# create a state file
7777
cat >> $OUT_DIR/state.yaml <<EOL
78-
output_sp: $OUT_DIR/raw_ist.zarr
79-
output_sc: $OUT_DIR/scrnaseq_reference.h5ad
80-
output_segmentation: $OUT_DIR/segmentation.zarr
81-
output_transcript_assignments: $OUT_DIR/transcript_assignments.zarr
82-
output_spatial_aggregated_counts: $OUT_DIR/spatial_aggregated_counts.h5ad
83-
output_cell_volumes: $OUT_DIR/cell_volumes.h5ad
84-
output_spatial_normalized_counts: $OUT_DIR/spatial_normalized_counts.h5ad
85-
output_spatial_with_cell_types: $OUT_DIR/spatial_with_cell_types.h5ad
86-
output_spatial_corrected_counts: $OUT_DIR/spatial_corrected_counts.h5ad
87-
output_spatial_qc_col: $OUT_DIR/spatial_qc_col.h5ad
88-
output_score: $OUT_DIR/score.h5ad
78+
id: mouse_brain_combined
79+
output_sp: !file raw_ist.zarr
80+
output_sc: !file scrnaseq_reference.h5ad
81+
output_segmentation: !file segmentation.zarr
82+
output_transcript_assignments: !file transcript_assignments.zarr
83+
output_spatial_aggregated_counts: !file spatial_aggregated_counts.h5ad
84+
output_cell_volumes: !file cell_volumes.h5ad
85+
output_spatial_normalized_counts: !file spatial_normalized_counts.h5ad
86+
output_spatial_with_cell_types: !file spatial_with_cell_types.h5ad
87+
output_spatial_corrected_counts: !file spatial_corrected_counts.h5ad
88+
output_spatial_qc_col: !file spatial_qc_col.h5ad
89+
output_score: !file score.h5ad
8990
EOL
9091

9192
# sync test resources

0 commit comments

Comments
 (0)