diff --git a/CHANGELOG.md b/CHANGELOG.md index 3706fa5bb..253eb57dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,17 @@ # Changelog -## v0.0.24 +## v0.0.24 (in progress) +- Release MERFISH +- Add MERFISH directory schema +- Fix documentation issue for MERFISH +- Add CEDAR link for MERFISH +- Update MERFISH directory schema +- Update Phenocycler docs +- Update MERFISH directory schema +- Add next-gen Cell DIVE directory schema +- Update MIBI directory schema +- Update Visium no probes directory schema +- Add Cell DIVE to index - Change to EntityTypeInfo constraint format to support constraints endpoint ## v0.0.23 diff --git a/docs/celldive/current/index.md b/docs/celldive/current/index.md index 497f98384..04215b0cb 100644 --- a/docs/celldive/current/index.md +++ b/docs/celldive/current/index.md @@ -3,7 +3,7 @@ title: Cell DIVE schema_name: celldive category: Multiplex Fluorescence Based Experiment (MxFBE) all_versions_deprecated: False -exclude_from_index: True +exclude_from_index: False layout: default --- @@ -28,5 +28,24 @@ Related files:
## Directory schemas -Version 2.0 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) +Version 2.0 (use this one) + +| pattern | required? | description | +| --- | --- | --- | +| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. [Exists in all assays] | +| extras\/microscope_hardware\.json | ✓ | **[QA/QC]** A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| extras\/microscope_settings\.json | | **[QA/QC]** A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| raw\/.* | ✓ | This is a directory containing raw data. | +| raw\/images\/.* | ✓ | Raw image files. Using this subdirectory allows for harmonization with other more complex assays, like Visium that includes both raw imaging and sequencing data. | +| raw\/images\/round_info_[^\/]+\.dat (example: raw/images/round_info_002.dat) | ✓ | Metadata file for the capture item-value tab separated format. This contains various instrument and acquisition details for each acquisition cycle. | +| lab_processed\/.* | ✓ | Experiment files that were processed by the lab generating the data. | +| lab_processed\/images\/.* | ✓ | This is a directory containing processed image files | +| lab_processed\/images\/region_[^\/]+\/[^\/]+_region_[^\/]+\.ome\.(?:tif|tiff) (example: lab_processed/images/region_001/S20030092_region_011.ome.tif) | ✓ | OME TIFF Files for the corresponding region (e.g. region_001) by slide (e.g S20030077), organized into subdirectories based on their region. | +| lab_processed\/images\/region_[^\/]+\/[^\/]*ome-tiff\.channels\.csv | ✓ | This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed | +| lab_processed\/annotations\/.* | ✓ | This is a directory containing annotations. | +| lab_processed\/annotations\/slide_list\.txt | ✓ | Information about the slides used by the experiment- each line corresponds to a slide name (begins with S - e.g. S20030077) - used in filenames. | +| lab_processed\/virtual_histology\/.* | ✓ | This is a directory containing annotations for virtual histology images | +| lab_processed\/virtual_histology\/HandE_RGB_thumbnail\.jpg | ✓ | Virtual H&E RGB thumbnail | +| lab_processed\/virtual_histology\/HandE_RGB\.tif | ✓ | Virtual H&E RGB image | +| lab_processed\/virtual_histology\/[^\/]+_VHE_region_[^\/]+\.tif | ✓ | Virtual H&E image | diff --git a/docs/merfish/current/index.md b/docs/merfish/current/index.md index 8268b203f..f518b3ad7 100644 --- a/docs/merfish/current/index.md +++ b/docs/merfish/current/index.md @@ -3,31 +3,105 @@ title: MERFISH schema_name: merfish category: Fluorescence In Situ Hybridization (FISH) all_versions_deprecated: False -exclude_from_index: True +exclude_from_index: False layout: default --- +Prepare your metadata based on the latest metadata schema using one of the template files below. See the instructions in the [Metadata Validation Workflow](https://docs.google.com/document/d/1lfgiDGbyO4K4Hz1FMsJjmJd9RdwjShtJqFYNwKpbcZY) document for more information on preparing and validating your metadata.tsv file prior to submission. Related files: -Excel and TSV templates for this schema will be available when the draft next-generation schema, to be used in all future submissions, is finalized (no later than Sept. 30). + +- [📝 Excel template](https://raw.githubusercontent.com/hubmapconsortium/dataset-metadata-spreadsheet/main/merfish/latest/merfish.xlsx): For metadata entry. +- [📝 TSV template](https://raw.githubusercontent.com/hubmapconsortium/dataset-metadata-spreadsheet/main/merfish/latest/merfish.tsv): Alternative for metadata entry. + [This link](https://docs.google.com/spreadsheets/d/1YnmdTAA0Z9MKN3OjR3Sca8pz-LNQll91wdQoRPSP6Q4/edit#gid=0) lists the set of fields that are required in the OME TIFF file XML header. ## Metadata schema -Version 2 (use this one) (draft - submission of data prepared using this schema will be supported by Sept. 30) (TBD) +Version 2 (use this one)
## Directory schemas -Version 2.0 (use this one) +Version 2.2 (use this one) + +| pattern | required? | description | +| --- | --- | --- | +| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. | +| extras\/microscope_hardware\.json | ✓ | **[QA/QC]** A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| extras\/microscope_settings\.json | | **[QA/QC]** A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| raw\/.* | ✓ | All raw data files for the experiment. | +| raw\/additional_panels_used\.csv | | If multiple commercial probe panels were used, then the primary probe panel should be selected in the "oligo_probe_panel" metadata field. The additional panels must be included in this file. Each panel record should include:manufacturer, model/name, product code. | +| raw\/gene_panel\.csv | ✓ | The list of target genes. The expected format is gene_id (ensembl ID), gene_name. | +| raw\/custom_probe_set\.csv | | This file should contain any custom probes used and must be included if the metadata field "is_custom_probes_used" is "Yes". The file should minimally include:target gene id, probe seq, probe id. The contents of this file are modeled after the 10x Genomics probe set file (see ). | +| raw\/micron_to_mosaic_pixel_transform\.csv | | Matrix used to transform from pixels to physical distance. | +| raw\/manifest\.json | ✓ | This file contains stain by channel details and pixel details. | +| raw\/codebook\.csv | ✓ | CSV containing codebook information for the experiment. Rows are barcodes and columns are imaging rounds. The first column is the barcode target, and the following column IDs are expected to be sequential, and round identifiers are expected to be integers (not roman numerals). | +| raw\/positions\.csv | ✓ | File that includes the top left coordinate of each tiled image. This is required to stitch the images. | +| raw\/dataorganization\.csv | ✓ | Necessary image definitions | +| raw\/[^\/]+\.DAX | ✓ | The raw image stack. | +| raw\/images\/.* | ✓ | Directory containing raw image files. This directory should include at least one raw file. | +| raw\/images\/[^\/]+\.tif | ✓ | Raw microscope file for the experiment. | +| lab_processed\/.* | ✓ | Experiment files that were processed by the lab generating the data. | +| lab_processed\/detected_transcripts\.csv | ✓ | A file containing the locations of each RNA target. | +| lab_processed\/images\/.* | ✓ | Processed image files | +| lab_processed\/images\/[^\/]+\.ome\.tiff (example: lab_processed/images/HBM892.MDXS.293.ome.tiff) | ✓ | OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. For Visium this stitched file should only include the single capture area relevant to the current dataset. For GeoMx there will be one OME TIFF file per slide, with each slide including multiple AOIs. See the following link for the set of fields that are required in the OME TIFF file XML header. | +| lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv | ✓ | This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed | + +Version 2.1 + +| pattern | required? | description | +| --- | --- | --- | +| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. | +| extras\/microscope_hardware\.json | ✓ | **[QA/QC]** A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| extras\/microscope_settings\.json | | **[QA/QC]** A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| raw\/.* | ✓ | All raw data files for the experiment. | +| raw\/additional_panels_used\.csv | | If multiple commercial probe panels were used, then the primary probe panel should be selected in the "oligo_probe_panel" metadata field. The additional panels must be included in this file. Each panel record should include:manufacturer, model/name, product code. | +| raw\/gene_panel\.csv | ✓ | The list of target genes. | +| raw\/custom_probe_set\.csv | | This file should contain any custom probes used and must be included if the metadata field "is_custom_probes_used" is "Yes". The file should minimally include:target gene id, probe seq, probe id. The contents of this file are modeled after the 10x Genomics probe set file (see ). | +| raw\/micron_to_mosaic_pixel_transform\.csv | | Matrix used to transform from pixels to physical distance. | +| raw\/manifest\.json | ✓ | This file contains stain by channel details and pixel details. | +| raw\/codebook\.csv | ✓ | CSV containing codebook information for the experiment. Rows are barcodes and columns are imaging rounds. The first column is the barcode target, and the following column IDs are expected to be sequential, and round identifiers are expected to be integers (not roman numerals). | +| raw\/positions\.csv | ✓ | File that includes the top left coordinate of each tiled image. This is required to stitch the images. | +| raw\/dataorganization\.csv | ✓ | Necessary image definitions | +| raw\/data\/.* | ✓ | All raw stack data files for the MERFISH experiment. | +| raw\/data\/[^\/]+\.dax | ✓ | The raw image stack. | +| raw\/data\/[^\/]+\.inf | ✓ | Information file with dax image format specifications. Variable expected for downstream processing with PIPEFISH are frame dimensions, number of frames, little/big endian, stage X and Y locations, lock target, scalemin, and scalemax. | +| raw\/images\/.* | ✓ | Directory containing raw image files. This directory should include at least one raw file. | +| raw\/images\/[^\/]+\.tif | ✓ | Raw microscope file for the experiment. | +| lab_processed\/.* | ✓ | Experiment files that were processed by the lab generating the data. | +| lab_processed\/detected_transcripts\.csv | ✓ | A file containing the locations of each RNA target. | +| lab_processed\/images\/.* | ✓ | Processed image files | +| lab_processed\/images\/[^\/]+\.ome\.tiff (example: lab_processed/images/HBM892.MDXS.293.ome.tiff) | ✓ | OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. For Visium this stitched file should only include the single capture area relevant to the current dataset. For GeoMx there will be one OME TIFF file per slide, with each slide including multiple AOIs. See the following link for the set of fields that are required in the OME TIFF file XML header. | +| lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv | ✓ | This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed | + +Version 2.0 | pattern | required? | description | | --- | --- | --- | -| TODO | ✓ | Directory structure not yet specified. | -| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. [Exists in all assays] | +| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. | +| extras\/microscope_hardware\.json | ✓ | **[QA/QC]** A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| extras\/microscope_settings\.json | | **[QA/QC]** A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| raw\/.* | ✓ | All raw data files for the experiment. | +| raw\/additional_panels_used\.csv | | If multiple commercial probe panels were used, then the primary probe panel should be selected in the "oligo_probe_panel" metadata field. The additional panels must be included in this file. Each panel record should include:manufacturer, model/name, product code. | +| raw\/gene_panel\.csv | ✓ | The list of target genes. | +| raw\/custom_probe_set\.csv | | This file should contain any custom probes used and must be included if the metadata field "is_custom_probes_used" is "Yes". The file should minimally include:target gene id, probe seq, probe id. The contents of this file are modeled after the 10x Genomics probe set file (see ). | +| raw\/micron_to_mosaic_pixel_transform\.csv | | Matrix used to transform from pixels to physical distance. | +| raw\/manifest\.json | ✓ | This file contains stain by channel details and pixel details. | +| raw\/codebook\.csv | ✓ | CSV containing codebook information for the experiment. Rows are barcodes and columns are imaging rounds. The first column is the barcode target, and the following column IDs are expected to be sequential, and round identifiers are expected to be integers (not roman numerals). | +| raw\/positions\.csv | ✓ | File that includes the top left coordinate of each tiled image. This is required to stitch the images. | +| raw\/dataorganization\.csv | ✓ | Necessary image definitions | +| raw\/[^\/]+\.DAX | ✓ | The raw image stack. | +| raw\/images\/.* | ✓ | Directory containing raw image files. This directory should include at least one raw file. | +| raw\/images\/[^\/]+\.tif | ✓ | Raw microscope file for the experiment. | +| lab_processed\/.* | ✓ | Experiment files that were processed by the lab generating the data. | +| lab_processed\/detected_transcripts\.csv | ✓ | A file containing the locations of each RNA target. | +| lab_processed\/images\/.* | ✓ | Processed image files | +| lab_processed\/images\/[^\/]+\.ome\.tiff (example: lab_processed/images/HBM892.MDXS.293.ome.tiff) | ✓ | OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. For Visium this stitched file should only include the single capture area relevant to the current dataset. For GeoMx there will be one OME TIFF file per slide, with each slide including multiple AOIs. See the following link for the set of fields that are required in the OME TIFF file XML header. | +| lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv | ✓ | This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed | diff --git a/docs/mibi/current/index.md b/docs/mibi/current/index.md index 5da6052ad..8cbd8e9c3 100644 --- a/docs/mibi/current/index.md +++ b/docs/mibi/current/index.md @@ -28,7 +28,23 @@ Related files:
## Directory schemas -Version 2.0 (use this one) +Version 2.1 (use this one) + +| pattern | required? | description | +| --- | --- | --- | +| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset. [Exists in all assays] | +| extras\/microscope_hardware\.json | ✓ | **[QA/QC]** A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| extras\/microscope_settings\.json | | **[QA/QC]** A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| raw\/.* | ✓ | This is a directory containing raw data. | +| raw\/images\/.* | ✓ | Raw image files. Using this subdirectory allows for harmonization with other more complex assays, like Visium that includes both raw imaging and sequencing data. | +| raw\/images\/[^\/]+\.ome\.tiff | | Raw image file. | +| raw\/images\/tiles\.csv | | This file contains the approximate coordinates for each of the tiled raw images. | +| lab_processed\/.* | ✓ | Experiment files that were processed by the lab generating the data. | +| lab_processed\/images\/.* | ✓ | This is a directory containing processed image files | +| lab_processed\/images\/[^\/]+\.ome\.tiff | ✓ | OME-TIFF file (multichannel, multi-layered) produced by the experiment. If compressed, must use loss-less compression algorithm. See the following link for the set of fields that are required in the OME TIFF file XML header. | +| lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv | ✓ | This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed | + +Version 2.0 | pattern | required? | description | | --- | --- | --- | diff --git a/docs/phenocycler/current/index.md b/docs/phenocycler/current/index.md index a4e2d9cf0..5b65c9ba0 100644 --- a/docs/phenocycler/current/index.md +++ b/docs/phenocycler/current/index.md @@ -16,7 +16,7 @@ Related files: - [📝 TSV template](https://raw.githubusercontent.com/hubmapconsortium/dataset-metadata-spreadsheet/main/phenocycler/latest/phenocycler.tsv): Alternative for metadata entry. -[This link](https://docs.google.com/spreadsheets/d/1YnmdTAA0Z9MKN3OjR3Sca8pz-LNQll91wdQoRPSP6Q4/edit#gid=0) lists the set of fields that are required in the OME TIFF file XML header. + ## Metadata schema diff --git a/docs/visium-no-probes/current/index.md b/docs/visium-no-probes/current/index.md index f6d37d4d5..c8254609e 100644 --- a/docs/visium-no-probes/current/index.md +++ b/docs/visium-no-probes/current/index.md @@ -30,7 +30,29 @@ REQUIRED - For this assay, you must also prepare and submit two additional metad
## Directory schemas -Version 2.1 (use this one) +Version 2.2 (use this one) + +| pattern | required? | description | +| --- | --- | --- | +| extras\/.* | ✓ | Folder for general lab-specific files related to the dataset | +| extras\/microscope_hardware\.json | ✓ | **[QA/QC]** A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| extras\/microscope_settings\.json | | **[QA/QC]** A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. | +| raw\/.* | ✓ | All raw data files for the experiment. | +| raw\/[^\/]+\.gpr | ✓ | This is a 10X Genomics layout file that's generated by 10X and individualized for each Visium slide. This is a text file and can be generated using this 10X web form along with the unique 10X Visium slide ID. | +| raw\/fastq\/.* | ✓ | Raw sequencing files for the experiment | +| raw\/fastq\/RNA\/.* | ✓ | Directory containing fastq files pertaining to RNAseq sequencing. | +| raw\/fastq\/RNA\/[^\/]+_R[^\/]+\.fastq\.gz | ✓ | This is a GZip'd version of the forward and reverse fastq files from RNAseq sequencing (R1 and R2). | +| raw\/images\/.* | ✓ | Directory containing raw image files. This directory should include at least one raw file. | +| raw\/images\/[^\/]+\.(?:xml|scn|vsi|svs|czi|tiff) | ✓ | Raw microscope file for the experiment | +| lab_processed\/.* | ✓ | Experiment files that were processed by the lab generating the data. | +| lab_processed\/images\/.* | ✓ | Processed image files | +| lab_processed\/images\/[^\/]+\.ome\.tiff (example: lab_processed/images/HBM892.MDXS.293.ome.tiff) | ✓ | OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. For Visium this stitched file should only include the single capture area relevant to the current dataset. For GeoMx there will be one OME TIFF file per slide, with each slide including multiple AOIs. See the following link for the set of fields that are required in the OME TIFF file XML header. | +| lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv | ✓ | This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed | +| lab_processed\/images\/[^\/]+\.json | | This file is the output from LoupeBrowser, when a data provider manually denotes which spots on the slide contain tissue. This file is optionally used by 10X SpaceRanger. | +| lab_processed\/transformations\/.* | | This directory contains transformation matrices that capture how each modality is aligned with the other and can be used to visualize overlays of multimodal data. This is needed to overlay images from the exact same tissue section (e.g., MALDI imaging mass spec, autofluorescence microscopy, MxIF, histological stains). In these cases data type may have different pixel sizes and slightly different orientations (i.e., one may be rotated relative to another). | +| lab_processed\/transformations\/[^\/]+\.txt | | Transformation matrices used to overlay images from the exact same tissue section (e.g., MALDI imaging mass spec, autofluorescence microscopy, MxIF, histological stains). | + +Version 2.1 | pattern | required? | description | dependent on | | --- | --- | --- | --- | diff --git a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md index 25e2f7577..23c54c7f9 100644 --- a/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md +++ b/examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/README.md @@ -1,7 +1,7 @@ ``` Directory Errors: ? examples/dataset-examples/bad-cedar-multi-assay-visium-bad-dir-structure/upload/Visium_9OLC_A4_S1 - (as visium-no-probes-v2.1) + (as visium-no-probes-v2.2) : - Required but missing: - lab_processed\/.*. - lab_processed\/images\/.*. diff --git a/examples/dataset-examples/good-cedar-multi-assay-visium/README.md b/examples/dataset-examples/good-cedar-multi-assay-visium/README.md index f9d0d9aeb..1ccf5c9a7 100644 --- a/examples/dataset-examples/good-cedar-multi-assay-visium/README.md +++ b/examples/dataset-examples/good-cedar-multi-assay-visium/README.md @@ -7,14 +7,14 @@ TSVs: good-visium-assay-metadata.tsv: Schema: visium-no-probes-v2 Metadata schema version: '2' - Directory schema version: visium-no-probes-v2.1 + Directory schema version: visium-no-probes-v2.2 good-visium-histology-metadata.tsv: Schema: h-and-e-v2 Metadata schema version: '2' - Directory schema version: visium-no-probes-v2.1 + Directory schema version: visium-no-probes-v2.2 good-visium-rnaseq-metadata.tsv: Schema: rnaseq-visium-no-probes-v2 Metadata schema version: '2' - Directory schema version: visium-no-probes-v2.1 + Directory schema version: visium-no-probes-v2.2 ``` \ No newline at end of file diff --git a/src/ingest_validation_tools/directory-schemas/celldive-v2.0.yaml b/src/ingest_validation_tools/directory-schemas/celldive-v2.0.yaml index e8d46e7ee..2ff10e7c9 100644 --- a/src/ingest_validation_tools/directory-schemas/celldive-v2.0.yaml +++ b/src/ingest_validation_tools/directory-schemas/celldive-v2.0.yaml @@ -1,5 +1,71 @@ -# GE CellDIVE Directory Schema -draft: true files: - - draft_link: 'https://docs.google.com/spreadsheets/d/1pZD2e51e4QkxzIk6xjHPPu1RBZpx5mzoykMmlaDK8rA' \ No newline at end of file + pattern: extras\/.* + required: True + description: Folder for general lab-specific files related to the dataset. [Exists in all assays] + - + pattern: extras\/microscope_hardware\.json + required: True + description: A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: extras\/microscope_settings\.json + required: False + description: A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: raw\/.* + required: True + description: This is a directory containing raw data. + - + pattern: raw\/images\/.* + required: True + description: Raw image files. Using this subdirectory allows for harmonization with other more complex assays, like Visium that includes both raw imaging and sequencing data. + - + pattern: raw\/images\/round_info_[^\/]+\.dat + required: True + description: Metadata file for the capture item-value tab separated format. This contains various instrument and acquisition details for each acquisition cycle. + is_qa_qc: False + example: raw/images/round_info_002.dat + - + pattern: lab_processed\/.* + required: True + description: Experiment files that were processed by the lab generating the data. + - + pattern: lab_processed\/images\/.* + required: True + description: This is a directory containing processed image files + - + pattern: lab_processed\/images\/region_[^\/]+\/[^\/]+_region_[^\/]+\.ome\.(?:tif|tiff) + required: True + description: OME TIFF Files for the corresponding region (e.g. region_001) by slide (e.g S20030077), organized into subdirectories based on their region. + is_qa_qc: False + example: lab_processed/images/region_001/S20030092_region_011.ome.tif + - + pattern: lab_processed\/images\/region_[^\/]+\/[^\/]*ome-tiff\.channels\.csv + required: True + description: This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed + - + pattern: lab_processed\/annotations\/.* + required: True + description: This is a directory containing annotations. + - + pattern: lab_processed\/annotations\/slide_list\.txt + required: True + description: Information about the slides used by the experiment- each line corresponds to a slide name (begins with S - e.g. S20030077) - used in filenames. + - + pattern: lab_processed\/virtual_histology\/.* + required: True + description: This is a directory containing annotations for virtual histology images + - + pattern: lab_processed\/virtual_histology\/HandE_RGB_thumbnail\.jpg + required: True + description: Virtual H&E RGB thumbnail + - + pattern: lab_processed\/virtual_histology\/HandE_RGB\.tif + required: True + description: Virtual H&E RGB image + - + pattern: lab_processed\/virtual_histology\/[^\/]+_VHE_region_[^\/]+\.tif + required: True + description: Virtual H&E image diff --git a/src/ingest_validation_tools/directory-schemas/merfish-v2.0.yaml b/src/ingest_validation_tools/directory-schemas/merfish-v2.0.yaml index 3042b8779..037829b4f 100644 --- a/src/ingest_validation_tools/directory-schemas/merfish-v2.0.yaml +++ b/src/ingest_validation_tools/directory-schemas/merfish-v2.0.yaml @@ -1,9 +1,86 @@ files: - - pattern: 'TODO' - description: 'Directory structure not yet specified.' + pattern: extras\/.* required: True + description: Folder for general lab-specific files related to the dataset. - - pattern: extras\/.* + pattern: extras\/microscope_hardware\.json + required: True + description: A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: extras\/microscope_settings\.json + required: False + description: A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: raw\/.* + required: True + description: All raw data files for the experiment. + - + pattern: raw\/additional_panels_used\.csv + required: False + description: If multiple commercial probe panels were used, then the primary probe panel should be selected in the "oligo_probe_panel" metadata field. The additional panels must be included in this file. Each panel record should include:manufacturer, model/name, product code. + - + pattern: raw\/gene_panel\.csv + required: True + description: The list of target genes. + - + pattern: raw\/custom_probe_set\.csv + required: False + description: This file should contain any custom probes used and must be included if the metadata field "is_custom_probes_used" is "Yes". The file should minimally include:target gene id, probe seq, probe id. The contents of this file are modeled after the 10x Genomics probe set file (see ). + - + pattern: raw\/micron_to_mosaic_pixel_transform\.csv + required: False + description: Matrix used to transform from pixels to physical distance. + - + pattern: raw\/manifest\.json + required: True + description: This file contains stain by channel details and pixel details. + - + pattern: raw\/codebook\.csv + required: True + description: CSV containing codebook information for the experiment. Rows are barcodes and columns are imaging rounds. The first column is the barcode target, and the following column IDs are expected to be sequential, and round identifiers are expected to be integers (not roman numerals). + - + pattern: raw\/positions\.csv + required: True + description: File that includes the top left coordinate of each tiled image. This is required to stitch the images. + - + pattern: raw\/dataorganization\.csv + required: True + description: Necessary image definitions + - + pattern: raw\/[^\/]+\.DAX + required: True + description: The raw image stack. + - + pattern: raw\/images\/.* + required: True + description: Directory containing raw image files. This directory should include at least one raw file. + - + pattern: raw\/images\/[^\/]+\.tif + required: True + description: Raw microscope file for the experiment. + - + pattern: lab_processed\/.* + required: True + description: Experiment files that were processed by the lab generating the data. + - + pattern: lab_processed\/detected_transcripts\.csv + required: True + description: A file containing the locations of each RNA target. + - + pattern: lab_processed\/images\/.* + required: True + description: Processed image files + - + pattern: lab_processed\/images\/[^\/]+\.ome\.tiff + required: True + description: OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. For Visium this stitched file should only include the single capture area relevant to the current dataset. For GeoMx there will be one OME TIFF file per slide, with each slide including multiple AOIs. See the following link for the set of fields that are required in the OME TIFF file XML header. + is_qa_qc: False + example: lab_processed/images/HBM892.MDXS.293.ome.tiff + - + pattern: lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv required: True - description: Folder for general lab-specific files related to the dataset. [Exists in all assays] + description: This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed + is_qa_qc: False diff --git a/src/ingest_validation_tools/directory-schemas/merfish-v2.1.yaml b/src/ingest_validation_tools/directory-schemas/merfish-v2.1.yaml new file mode 100644 index 000000000..5fdc699f3 --- /dev/null +++ b/src/ingest_validation_tools/directory-schemas/merfish-v2.1.yaml @@ -0,0 +1,94 @@ +files: + - + pattern: extras\/.* + required: True + description: Folder for general lab-specific files related to the dataset. + - + pattern: extras\/microscope_hardware\.json + required: True + description: A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: extras\/microscope_settings\.json + required: False + description: A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: raw\/.* + required: True + description: All raw data files for the experiment. + - + pattern: raw\/additional_panels_used\.csv + required: False + description: If multiple commercial probe panels were used, then the primary probe panel should be selected in the "oligo_probe_panel" metadata field. The additional panels must be included in this file. Each panel record should include:manufacturer, model/name, product code. + - + pattern: raw\/gene_panel\.csv + required: True + description: The list of target genes. + - + pattern: raw\/custom_probe_set\.csv + required: False + description: This file should contain any custom probes used and must be included if the metadata field "is_custom_probes_used" is "Yes". The file should minimally include:target gene id, probe seq, probe id. The contents of this file are modeled after the 10x Genomics probe set file (see ). + - + pattern: raw\/micron_to_mosaic_pixel_transform\.csv + required: False + description: Matrix used to transform from pixels to physical distance. + - + pattern: raw\/manifest\.json + required: True + description: This file contains stain by channel details and pixel details. + - + pattern: raw\/codebook\.csv + required: True + description: CSV containing codebook information for the experiment. Rows are barcodes and columns are imaging rounds. The first column is the barcode target, and the following column IDs are expected to be sequential, and round identifiers are expected to be integers (not roman numerals). + - + pattern: raw\/positions\.csv + required: True + description: File that includes the top left coordinate of each tiled image. This is required to stitch the images. + - + pattern: raw\/dataorganization\.csv + required: True + description: Necessary image definitions + - + pattern: raw\/data\/.* + required: True + description: All raw stack data files for the MERFISH experiment. + - + pattern: raw\/data\/[^\/]+\.dax + required: True + description: The raw image stack. + - + pattern: raw\/data\/[^\/]+\.inf + required: True + description: Information file with dax image format specifications. Variable expected for downstream processing with PIPEFISH are frame dimensions, number of frames, little/big endian, stage X and Y locations, lock target, scalemin, and scalemax. + - + pattern: raw\/images\/.* + required: True + description: Directory containing raw image files. This directory should include at least one raw file. + - + pattern: raw\/images\/[^\/]+\.tif + required: True + description: Raw microscope file for the experiment. + - + pattern: lab_processed\/.* + required: True + description: Experiment files that were processed by the lab generating the data. + - + pattern: lab_processed\/detected_transcripts\.csv + required: True + description: A file containing the locations of each RNA target. + - + pattern: lab_processed\/images\/.* + required: True + description: Processed image files + - + pattern: lab_processed\/images\/[^\/]+\.ome\.tiff + required: True + description: OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. For Visium this stitched file should only include the single capture area relevant to the current dataset. For GeoMx there will be one OME TIFF file per slide, with each slide including multiple AOIs. See the following link for the set of fields that are required in the OME TIFF file XML header. + is_qa_qc: False + example: lab_processed/images/HBM892.MDXS.293.ome.tiff + - + pattern: lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv + required: True + description: This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed + is_qa_qc: False diff --git a/src/ingest_validation_tools/directory-schemas/merfish-v2.2.yaml b/src/ingest_validation_tools/directory-schemas/merfish-v2.2.yaml new file mode 100644 index 000000000..65aee2a00 --- /dev/null +++ b/src/ingest_validation_tools/directory-schemas/merfish-v2.2.yaml @@ -0,0 +1,86 @@ +files: + - + pattern: extras\/.* + required: True + description: Folder for general lab-specific files related to the dataset. + - + pattern: extras\/microscope_hardware\.json + required: True + description: A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: extras\/microscope_settings\.json + required: False + description: A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: raw\/.* + required: True + description: All raw data files for the experiment. + - + pattern: raw\/additional_panels_used\.csv + required: False + description: If multiple commercial probe panels were used, then the primary probe panel should be selected in the "oligo_probe_panel" metadata field. The additional panels must be included in this file. Each panel record should include:manufacturer, model/name, product code. + - + pattern: raw\/gene_panel\.csv + required: True + description: The list of target genes. The expected format is gene_id (ensembl ID), gene_name. + - + pattern: raw\/custom_probe_set\.csv + required: False + description: This file should contain any custom probes used and must be included if the metadata field "is_custom_probes_used" is "Yes". The file should minimally include:target gene id, probe seq, probe id. The contents of this file are modeled after the 10x Genomics probe set file (see ). + - + pattern: raw\/micron_to_mosaic_pixel_transform\.csv + required: False + description: Matrix used to transform from pixels to physical distance. + - + pattern: raw\/manifest\.json + required: True + description: This file contains stain by channel details and pixel details. + - + pattern: raw\/codebook\.csv + required: True + description: CSV containing codebook information for the experiment. Rows are barcodes and columns are imaging rounds. The first column is the barcode target, and the following column IDs are expected to be sequential, and round identifiers are expected to be integers (not roman numerals). + - + pattern: raw\/positions\.csv + required: True + description: File that includes the top left coordinate of each tiled image. This is required to stitch the images. + - + pattern: raw\/dataorganization\.csv + required: True + description: Necessary image definitions + - + pattern: raw\/[^\/]+\.DAX + required: True + description: The raw image stack. + - + pattern: raw\/images\/.* + required: True + description: Directory containing raw image files. This directory should include at least one raw file. + - + pattern: raw\/images\/[^\/]+\.tif + required: True + description: Raw microscope file for the experiment. + - + pattern: lab_processed\/.* + required: True + description: Experiment files that were processed by the lab generating the data. + - + pattern: lab_processed\/detected_transcripts\.csv + required: True + description: A file containing the locations of each RNA target. + - + pattern: lab_processed\/images\/.* + required: True + description: Processed image files + - + pattern: lab_processed\/images\/[^\/]+\.ome\.tiff + required: True + description: OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. For Visium this stitched file should only include the single capture area relevant to the current dataset. For GeoMx there will be one OME TIFF file per slide, with each slide including multiple AOIs. See the following link for the set of fields that are required in the OME TIFF file XML header. + is_qa_qc: False + example: lab_processed/images/HBM892.MDXS.293.ome.tiff + - + pattern: lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv + required: True + description: This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed + is_qa_qc: False diff --git a/src/ingest_validation_tools/directory-schemas/mibi-v2.1.yaml b/src/ingest_validation_tools/directory-schemas/mibi-v2.1.yaml new file mode 100644 index 000000000..f71ccc6cc --- /dev/null +++ b/src/ingest_validation_tools/directory-schemas/mibi-v2.1.yaml @@ -0,0 +1,48 @@ +files: + - + pattern: extras\/.* + required: True + description: Folder for general lab-specific files related to the dataset. [Exists in all assays] + - + pattern: extras\/microscope_hardware\.json + required: True + description: A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: extras\/microscope_settings\.json + required: False + description: A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: raw\/.* + required: True + description: This is a directory containing raw data. + - + pattern: raw\/images\/.* + required: True + description: Raw image files. Using this subdirectory allows for harmonization with other more complex assays, like Visium that includes both raw imaging and sequencing data. + - + pattern: raw\/images\/[^\/]+\.ome\.tiff + required: False + description: Raw image file. + - + pattern: raw\/images\/tiles\.csv + required: False + description: This file contains the approximate coordinates for each of the tiled raw images. + - + pattern: lab_processed\/.* + required: True + description: Experiment files that were processed by the lab generating the data. + - + pattern: lab_processed\/images\/.* + required: True + description: This is a directory containing processed image files + - + pattern: lab_processed\/images\/[^\/]+\.ome\.tiff + required: True + description: OME-TIFF file (multichannel, multi-layered) produced by the experiment. If compressed, must use loss-less compression algorithm. See the following link for the set of fields that are required in the OME TIFF file XML header. + is_qa_qc: False + - + pattern: lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv + required: True + description: This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed diff --git a/src/ingest_validation_tools/directory-schemas/visium-no-probes-v2.2.yaml b/src/ingest_validation_tools/directory-schemas/visium-no-probes-v2.2.yaml new file mode 100644 index 000000000..67dbfee11 --- /dev/null +++ b/src/ingest_validation_tools/directory-schemas/visium-no-probes-v2.2.yaml @@ -0,0 +1,78 @@ +files: + - + pattern: extras\/.* + required: True + description: Folder for general lab-specific files related to the dataset + - + pattern: extras\/microscope_hardware\.json + required: True + description: A file generated by the micro-meta app that contains a description of the hardware components of the microscope. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: extras\/microscope_settings\.json + required: False + description: A file generated by the micro-meta app that contains a description of the settings that were used to acquire the image data. Email HuBMAP Consortium Help Desk if help is required in generating this document. + is_qa_qc: True + - + pattern: raw\/.* + required: True + description: All raw data files for the experiment. + - + pattern: raw\/[^\/]+\.gpr + required: True + description: This is a 10X Genomics layout file that's generated by 10X and individualized for each Visium slide. This is a text file and can be generated using this 10X web form along with the unique 10X Visium slide ID. + is_qa_qc: False + - + pattern: raw\/fastq\/.* + required: True + description: Raw sequencing files for the experiment + - + pattern: raw\/fastq\/RNA\/.* + required: True + description: Directory containing fastq files pertaining to RNAseq sequencing. + - + pattern: raw\/fastq\/RNA\/[^\/]+_R[^\/]+\.fastq\.gz + required: True + description: This is a GZip'd version of the forward and reverse fastq files from RNAseq sequencing (R1 and R2). + is_qa_qc: False + - + pattern: raw\/images\/.* + required: True + description: Directory containing raw image files. This directory should include at least one raw file. + - + pattern: raw\/images\/[^\/]+\.(?:xml|scn|vsi|svs|czi|tiff) + required: True + description: Raw microscope file for the experiment + is_qa_qc: False + - + pattern: lab_processed\/.* + required: True + description: Experiment files that were processed by the lab generating the data. + - + pattern: lab_processed\/images\/.* + required: True + description: Processed image files + - + pattern: lab_processed\/images\/[^\/]+\.ome\.tiff + required: True + description: OME-TIFF files (multichannel, multi-layered) produced by the microscopy experiment. If compressed, must use loss-less compression algorithm. For Visium this stitched file should only include the single capture area relevant to the current dataset. For GeoMx there will be one OME TIFF file per slide, with each slide including multiple AOIs. See the following link for the set of fields that are required in the OME TIFF file XML header. + is_qa_qc: False + example: lab_processed/images/HBM892.MDXS.293.ome.tiff + - + pattern: lab_processed\/images\/[^\/]*ome-tiff\.channels\.csv + required: True + description: This file provides essential documentation pertaining to each channel of the accommpanying OME TIFF. The file should contain one row per OME TIFF channel. The required fields are detailed + is_qa_qc: False + - + pattern: lab_processed\/images\/[^\/]+\.json + required: False + description: This file is the output from LoupeBrowser, when a data provider manually denotes which spots on the slide contain tissue. This file is optionally used by 10X SpaceRanger. + - + pattern: lab_processed\/transformations\/.* + required: False + description: This directory contains transformation matrices that capture how each modality is aligned with the other and can be used to visualize overlays of multimodal data. This is needed to overlay images from the exact same tissue section (e.g., MALDI imaging mass spec, autofluorescence microscopy, MxIF, histological stains). In these cases data type may have different pixel sizes and slightly different orientations (i.e., one may be rotated relative to another). + - + pattern: lab_processed\/transformations\/[^\/]+\.txt + required: False + description: Transformation matrices used to overlay images from the exact same tissue section (e.g., MALDI imaging mass spec, autofluorescence microscopy, MxIF, histological stains). + is_qa_qc: False diff --git a/src/ingest_validation_tools/table-schemas/assays/celldive-v2.yaml b/src/ingest_validation_tools/table-schemas/assays/celldive-v2.yaml index db5b484fe..3938ef01e 100644 --- a/src/ingest_validation_tools/table-schemas/assays/celldive-v2.yaml +++ b/src/ingest_validation_tools/table-schemas/assays/celldive-v2.yaml @@ -1,4 +1,3 @@ -exclude_from_index: True description_md: '[This link](https://docs.google.com/spreadsheets/d/1YnmdTAA0Z9MKN3OjR3Sca8pz-LNQll91wdQoRPSP6Q4/edit#gid=0) lists the set of fields that are required in the OME TIFF file XML header.' fields: - name: is_cedar diff --git a/src/ingest_validation_tools/table-schemas/assays/merfish-v2.yaml b/src/ingest_validation_tools/table-schemas/assays/merfish-v2.yaml index 6288083c4..7175fe65a 100644 --- a/src/ingest_validation_tools/table-schemas/assays/merfish-v2.yaml +++ b/src/ingest_validation_tools/table-schemas/assays/merfish-v2.yaml @@ -1,21 +1,19 @@ -exclude_from_index: True description_md: '[This link](https://docs.google.com/spreadsheets/d/1YnmdTAA0Z9MKN3OjR3Sca8pz-LNQll91wdQoRPSP6Q4/edit#gid=0) lists the set of fields that are required in the OME TIFF file XML header.' -draft: true fields: -- name: is_cedar - description: 'Identifies whether the version is hosted by CEDAR' - example: '' -- name: assay_category - constraints: - enum: - - fish -- name: assay_type - constraints: - enum: - - MERFISH -- name: parent_id - description: "UUID or HuBMAP ID of parent" - example: "ec2ccf5a3436b11d0c7186b74ad4673d" - custom_constraints: - url: - prefix: "https://entity.api.hubmapconsortium.org/entities/" \ No newline at end of file + - name: is_cedar + description: 'Identifies whether the version is hosted by CEDAR' + example: 'https://openview.metadatacenter.org/templates/https:%2F%2Frepo.metadatacenter.org%2Ftemplates%2Ff1ef260f-d4a3-43db-a739-49b394aeee20' + - name: assay_category + constraints: + enum: + - fish + - name: assay_type + constraints: + enum: + - MERFISH + - name: parent_id + description: "UUID or HuBMAP ID of parent" + example: "ec2ccf5a3436b11d0c7186b74ad4673d" + custom_constraints: + url: + prefix: "https://entity.api.hubmapconsortium.org/entities/" \ No newline at end of file diff --git a/src/ingest_validation_tools/table-schemas/assays/phenocycler-v2.yaml b/src/ingest_validation_tools/table-schemas/assays/phenocycler-v2.yaml index 343588b0e..cdb781e6d 100644 --- a/src/ingest_validation_tools/table-schemas/assays/phenocycler-v2.yaml +++ b/src/ingest_validation_tools/table-schemas/assays/phenocycler-v2.yaml @@ -1,4 +1,3 @@ -description_md: '[This link](https://docs.google.com/spreadsheets/d/1YnmdTAA0Z9MKN3OjR3Sca8pz-LNQll91wdQoRPSP6Q4/edit#gid=0) lists the set of fields that are required in the OME TIFF file XML header.' fields: - name: is_cedar description: 'Identifies whether the version is hosted by CEDAR'