From 76936fe74687746d156bcffeb286473b2d746663 Mon Sep 17 00:00:00 2001 From: jpuerto-psc <68066250+jpuerto-psc@users.noreply.github.com> Date: Tue, 7 May 2024 11:07:18 -0400 Subject: [PATCH] Jpuerto/donor yaml update (#1329) * Docs: Add donor files back to YAML * Docs: Remove murine-source fields * Docs: Remove murine-source fields --------- Co-authored-by: Juan Puerto <=> --- CHANGELOG.md | 1 + docs/field-descriptions.yaml | 63 +++++++++++++++++------------------- docs/field-entities.yaml | 60 ++++++++++++++-------------------- docs/field-schemas.yaml | 60 ++++++++++++++-------------------- docs/field-types.yaml | 28 +++++++--------- 5 files changed, 91 insertions(+), 121 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f60a089ac..c8a9beafe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ - Bugfix stripping trailing slash in ingest api url - Converted upload `_url_checks` to use `_get_method` for SenNet compatibility - Add CEDAR template for murine-source +- Add donor field descriptions back, remove murine-source descriptions ## v0.0.17 diff --git a/docs/field-descriptions.yaml b/docs/field-descriptions.yaml index 86f429243..06c01bca5 100644 --- a/docs/field-descriptions.yaml +++ b/docs/field-descriptions.yaml @@ -24,6 +24,8 @@ acquisition_matrix_size_in_frequency_encoding_direction: Dimensions of the acqui acquisition_matrix_size_in_phase_encoding_direction: Dimensions of the acquired phase data before reconstruction, per image. affiliation: Institutional affiliation +age_unit: Unit for age measurement. +age_value: The time elapsed since birth. analyte_class: Analytes are the target molecules being measured with the assay. antibodies_path: Relative path to file with antibody information for this dataset. antibody_name: Anti-(target name) antibody. Not validated or used down-stream. @@ -40,7 +42,10 @@ assay_type: The specific type of assay being executed. bead_barcode_offset: Position(s) in the read at which the bead barcode starts bead_barcode_read: Which read file contains the bead barcode bead_barcode_size: Length of the bead barcode in base pairs -bedding: The type of cage bedding in the cage where the source is housed. +blood_type: ABO blood type or "serotype" refers to the presence/absence of the either/both + A & B blood antigens. +body_mass_index_value: An individual's weight in kilograms divided by the square of + the height in meters. bulk_atac_cell_isolation_protocols_io_doi: 'Link to a protocols document answering the question: How was tissue stored and processed for cell/nuclei isolation' bulk_rna_isolation_protocols_io_doi: 'Link to a protocols document answering the question: @@ -53,7 +58,7 @@ bulk_rna_yield_value: 'RNA (ng) per Weight of Tissue (mg). Answer the question: RNA? Calculate the yield by dividing total RNA isolated by amount of tissue used to isolate RNA from (ng/mg).' bulk_transposition_input_number_nuclei: A number (no comma separators) -cage_enhancements: "Environmental enrichments present in the source\u2019s cage." +cause_of_death: The circumstance or condition that caused death. ce_background_electrolyte: Chemical composition of the background electrolyte that fills the separation capillary (e.g. "3% acetic acid"). ce_capillary_coating: Treatment of surface of separation capillary. Capillary coating @@ -108,16 +113,11 @@ data_collection_mode: Mode of data collection in tandem MS assays. Either DDA (D data_path: Relative path to file or directory with instrument data. Downstream processing will depend on filename extension conventions. data_precision_bytes: Numerical data precision in bytes -date_of_birth_or_fertilization: The date when the mouse/embryo was born/fertilized. - If the hours/minutes are not known, use '00:00'. -date_of_death: The date when the mouse/embryo died. If the hours/minutes are not known, - use '00:00'. description: Free-text description of this assay. desi_solvent: Solvent composition for conducting nanospray desorption electrospray ionization (nanoDESI) or desorption electrospray ionization (DESI). desi_solvent_flow_rate: The rate of flow of the solvent into a spray. desi_solvent_flow_rate_unit: Units of the rate of solvent flow. -diet: A free text description of the source's diet. dilution: Antibody solutions may be diluted according to the experimental protocol. dms: Was differential mobility spectrometry used in this assay? dna_assay_input_unit: Units of DNA input into library preparation @@ -132,7 +132,6 @@ echo_time: 'Time in msec between the middle of the excitation pulse and the peak to cover the center of k-space (i.e., -kx=0, ky=0). ' echo_train_length: 'Number of lines in k-space acquired per excitation per image. ' end_datetime: Time stamp indicating end of ablation for ROI -euthanization_method: If the source was euthanized, select the method of euthanization. execution_datetime: Start date and time of assay, typically a date-time stamped folder generated by the acquisition instrument. YYYY-MM-DD hh:mm, where YYYY is the year, MM is the month with leading 0s, and DD is the day with leading 0s, hh is the hour @@ -160,6 +159,9 @@ health_status: "Patient's baseline physical condition prior to immediate event l \ healthy subject may have experienced trauma leading to brain death. As a result\ \ of organ donation, a sample is collected. In this scenario, the subject is deemed\ \ \u201Crelatively healthy.\u201D" +height_unit: Unit for height measurement. +height_value: The vertical measurement or distance from the base to the top of a subject + or participant. histological_report: histopathological reporting of key variables that are important for the tissue (absence of necrosis, comment on composition, significant pathology description, high level inflammation/fibrosis assessment etc @@ -178,11 +180,21 @@ ion_mobility: 'Specifies whether or not ion mobility spectrometry was performed ion_source: Specifies the ion source used is_cedar: Identifies whether the version is hosted by CEDAR is_contact: Is this individual a contact for DOI purposes? -is_deceased: Is the source deceased? Use either 'True' or 'False'. -is_embryo: Is the source an embryo? Use either 'True' or 'False'. is_targeted: Specifies whether or not a specific molecule(s) is/are targeted for detection/measurement by the assay. is_technical_replicate: Is this a sequencing replicate? +kidney_donor_profile_index_value: 'The Kidney Donor Profle Index (KDPI) is a numerical + measure that combines ten donor factors, including clinical parameters and demographics, + to summarize into a single number the quality of deceased donor kidneys relative + to other recovered kidneys. The KDPI is derived by frst calculating the Kidney Donor + Risk Index (KDRI) for a deceased donor. Kidneys from a donor with a KDPI of 90%, + for example, have a KDRI (which indicates relative risk of graft failure) greater + than 90% of recovered kidneys. The KDPI is simply a mapping of the KDRI from a relative + risk scale to a cumulative percentage scale. The reference population used for this + mapping is all deceased donors in the United States with a kidney recovered for + the purpose of transplantation in the prior calendar year. Lower KDPI values are + associated with increased donor quality and expected longevity. https://optn.transplant.hrsa.gov/media/1512/guide_to_calculating_interpreting_kdpi.pdf + ' lab_id: "An internal field labs can use it to add whatever ID(s) they want or need\ \ for dataset validation and tracking. This could be a single ID (e.g., \"Visium_9OLC_A4_S1\"\ ) or a delimited list of IDs (e.g., \u201C9OL; 9OLC.A2; Visium_9OLC_A4_S1\u201D\ @@ -235,14 +247,6 @@ library_layout: Whether the library was generated for single-end or paired end s library_pcr_cycles: Number of PCR cycles to amplify cDNA library_pcr_cycles_for_sample_index: Number of PCR cycles performed for library indexing library_preparation_kit: Reagent kit used for library preparation -light_cycle: The light cycle in the room where the source is housed. "Standard/default" - refers to 12-hour photoperiods (e.g., lights on at 7:00 AM, lights off at 7:00 PM). - "Longer photoperiods" refers to 14-hour photoperiods (e.g., lights on at 7:00 AM, - lights off at 9:00 PM). "Reverse lightcycles" means that the the timing of the 12-hour - photoperiod is reversed (.e.g, lights on at 7:00 PM, lights off at 7:00 AM). -local_lifespan_data: A free text description of how long mice live within the local - environment. It is recommended to provide the median or maximum values for murine - lifespans. lot_number: 'The lot# is specific to the vendor. (eg: Abcam lot# GR3238979-1)' mass_resolving_power: "The MS1 resolving power defined as m/\u2206m where \u2206m\ \ is the FWHM for a given peak with a specified m/z (m). (unitless)" @@ -250,6 +254,10 @@ max_x_width_unit: Units of image width of the ROI acquisition max_x_width_value: Image width value of the ROI acquisition max_y_height_unit: Units of image height of the ROI acquisition max_y_height_value: Image height value of the ROI acquisition +mechanism_of_injury: 'Mechanism of injury may be, for example: fall, impact (eg: auto + accident), weapon (eg: firearm), etc.' +medical_history: A record of a patient's background regarding health and the occurrence + of disease events of the individual. middle_name_or_initial: Middle name or initial ms_scan_mode: Indicates whether the data were generated using MS, MS/MS or MS3. ms_source: The ion source type used for surface sampling. @@ -352,7 +360,8 @@ quality_criteria: 'For example, RIN: 8.7. For suspensions, measured by visual in or no cells. This can be captured at a high level. "OK" or "not OK", or with more specificity such as "debris", "clump", "low clump".' quality_view: The quality of the acquired ultrasound images. -rack_setup: The rack setup type in which the source is housed. +race: A grouping of humans based on shared physical characteristics or social/ethnic + identity generally viewed as distinct. range_z_unit: The unit of range_z_value. range_z_value: The total range of the z axis. reagent_prep_protocols_io_doi: DOI for protocols.io referring to the protocol for @@ -374,10 +383,6 @@ roi_description: A description of the region of interest (ROI) captured in the i roi_id: Multiple images (1-n) are acquired from regions of interest (ROI1, ROI2, ROI3, etc) on a slide. The roi_id is a number from 1-n representing the ROI captured on a slide. -room_health_status: A description of the pathogen and opportunist exclusion level - of the room where the source is housed. -room_temperature: The temperature value in Celsius of the room where the source is - housed. An example is "23". rr_id: The rr_id is a unique antibody identifier that comes from the Antibody Registry (https://antibodyregistry.org). sample_id: UUID or HuBMAP ID of parent @@ -412,7 +417,7 @@ sequencing_read_format: Slash-delimited list of the number of sequencing cycles sequencing_read_percent_q30: 'Q30 is the weighted average of all the reads (e.g. # bases UMI * q30 UMI + # bases R2 * q30 R2 + ...)' sequencing_reagent_kit: Reagent kit used for sequencing -sex: The sex of the mouse. +sex: 'Biological sex at birth: male or female or other.' signal_type: Type of signal measured per channel (usually dual counts) single_file_export_format: 'The format in which each single imaging file will be exported. (Example: DICOM, tiff, avi, etc.)' @@ -420,7 +425,6 @@ sn_quality: 'An integer describing the signal to noise quality of an OCT image ( 30)' sn_quality_unit: 'The unit of the integer describing the signal to noise quality of an OCT image (Example: dB)' -source_id: SenNet ID of the source (whole organism) of the assayed tissue. source_project: External source (outside of HuBMAP) of the project, eg. HCA (The Human Cell Atlas Consortium). source_storage_time_unit: Time unit @@ -455,13 +459,6 @@ step_z_value: The number of optical sections in z axis range. storage_media: What was the sample preserved in. storage_method: The method by which the sample was stored, after preparation and before the assay was performed. -strain: Jackson Labs nomenclature. When mutant alleles are part of the strain name, - use "<" and ">" to indicate the superscripted alleles. For example, C57BL/6J-KitW-39J - should be entered as "C57BL/6J-Kit", where "W-39J" would be the portion of - the string displayed as superscripted text. For further information, see the "Quick - Guide to Mouse Nomenclature" (https://resources.jax.org/guides/quick-guide-to-mouse-nomenclature). -strain_rrid: The Research Resource Identifier (RRID) (https://scicrunch.org/resources/data/source/nlx_154697-1/search) - for the strain. An example is 'RRID:MGI:3713213' suspension_enriched: Was the cell/nuclei population enriched? suspension_enriched_target: If the suspension was enriched, then this is the target of the enrichment. @@ -500,8 +497,6 @@ warm_ischemic_time_value: 'Time interval from interruption of blood supply of ti to cooling to 4C: For organ donor: cessation of blood flow to perfusion of organ (cooled to 4C) For surgical specimen/biopsy: cessation of blood flow to specimen (time biopsy taken or blood supply is interrupted) to cooling of specimen to 4C.' -water_source: A free text description of the source's water supply, including any - treatments to the water. wavelength_unit: The unit of the wavelength value used to acquire OCT images (nm) wavelength_value: 'The value of the wavelength used to acquire OCT images (Example: 787)' diff --git a/docs/field-entities.yaml b/docs/field-entities.yaml index ee7fdae5b..2b19d1777 100644 --- a/docs/field-entities.yaml +++ b/docs/field-entities.yaml @@ -22,6 +22,10 @@ acquisition_matrix_size_in_phase_encoding_direction: - dataset affiliation: - contributors +age_unit: +- donor +age_value: +- donor analyte_class: - dataset antibodies_path: @@ -48,8 +52,10 @@ bead_barcode_read: - dataset bead_barcode_size: - dataset -bedding: -- murine +blood_type: +- donor +body_mass_index_value: +- donor bulk_atac_cell_isolation_protocols_io_doi: - dataset bulk_rna_isolation_protocols_io_doi: @@ -62,8 +68,8 @@ bulk_rna_yield_value: - dataset bulk_transposition_input_number_nuclei: - dataset -cage_enhancements: -- murine +cause_of_death: +- donor ce_background_electrolyte: - dataset ce_capillary_coating: @@ -128,10 +134,6 @@ data_path: - dataset data_precision_bytes: - dataset -date_of_birth_or_fertilization: -- murine -date_of_death: -- murine description: - dataset desi_solvent: @@ -140,8 +142,6 @@ desi_solvent_flow_rate: - dataset desi_solvent_flow_rate_unit: - dataset -diet: -- murine dilution: - antibodies dms: @@ -162,8 +162,6 @@ echo_train_length: - dataset end_datetime: - dataset -euthanization_method: -- murine execution_datetime: - dataset expected_cell_count: @@ -190,6 +188,10 @@ harmonics: - dataset health_status: - sample +height_unit: +- donor +height_value: +- donor histological_report: - sample imaging_threshold_unit_value: @@ -213,14 +215,12 @@ is_cedar: - sample is_contact: - contributors -is_deceased: -- murine -is_embryo: -- murine is_targeted: - dataset is_technical_replicate: - dataset +kidney_donor_profile_index_value: +- donor lab_id: - organ label_name: @@ -293,10 +293,6 @@ library_pcr_cycles_for_sample_index: - dataset library_preparation_kit: - dataset -light_cycle: -- murine -local_lifespan_data: -- murine lot_number: - antibodies mass_resolving_power: @@ -309,6 +305,10 @@ max_y_height_unit: - dataset max_y_height_value: - dataset +mechanism_of_injury: +- donor +medical_history: +- donor middle_name_or_initial: - contributors ms_scan_mode: @@ -455,8 +455,8 @@ quality_criteria: - sample quality_view: - dataset -rack_setup: -- murine +race: +- donor range_z_unit: - dataset range_z_value: @@ -489,10 +489,6 @@ roi_description: - dataset roi_id: - dataset -room_health_status: -- murine -room_temperature: -- murine rr_id: - antibodies sample_id: @@ -534,7 +530,7 @@ sequencing_read_percent_q30: sequencing_reagent_kit: - dataset sex: -- murine +- donor signal_type: - dataset single_file_export_format: @@ -543,8 +539,6 @@ sn_quality: - dataset sn_quality_unit: - dataset -source_id: -- murine source_project: - dataset source_storage_time_unit: @@ -577,10 +571,6 @@ storage_media: - sample storage_method: - sample -strain: -- murine -strain_rrid: -- murine suspension_enriched: - sample suspension_enriched_target: @@ -642,15 +632,15 @@ warm_ischemic_time_unit: - organ warm_ischemic_time_value: - organ -water_source: -- murine wavelength_unit: - dataset wavelength_value: - dataset weight_unit: +- donor - organ - sample weight_value: +- donor - sample diff --git a/docs/field-schemas.yaml b/docs/field-schemas.yaml index 8ba2c53bb..e494ead73 100644 --- a/docs/field-schemas.yaml +++ b/docs/field-schemas.yaml @@ -82,6 +82,10 @@ acquisition_matrix_size_in_phase_encoding_direction: - mri affiliation: - contributors +age_unit: +- donor +age_value: +- donor analyte_class: - af - bulkatacseq @@ -252,8 +256,10 @@ bead_barcode_read: - slideseq bead_barcode_size: - slideseq -bedding: -- murine-source +blood_type: +- donor +body_mass_index_value: +- donor bulk_atac_cell_isolation_protocols_io_doi: - bulkatacseq bulk_rna_isolation_protocols_io_doi: @@ -266,8 +272,8 @@ bulk_rna_yield_value: - bulkrnaseq bulk_transposition_input_number_nuclei: - bulkatacseq -cage_enhancements: -- murine-source +cause_of_death: +- donor ce_background_electrolyte: - cems ce_capillary_coating: @@ -395,10 +401,6 @@ data_path: data_precision_bytes: - imc - mibi -date_of_birth_or_fertilization: -- murine-source -date_of_death: -- murine-source description: - af - bodyct @@ -432,8 +434,6 @@ desi_solvent_flow_rate: - ims desi_solvent_flow_rate_unit: - ims -diet: -- murine-source dilution: - antibodies dms: @@ -482,8 +482,6 @@ echo_train_length: end_datetime: - imc - mibi -euthanization_method: -- murine-source execution_datetime: - af - bodyct @@ -538,6 +536,10 @@ harmonics: - ultrasound health_status: - sample +height_unit: +- donor +height_value: +- donor histological_report: - sample-block - sample-section @@ -613,10 +615,6 @@ is_cedar: - xenium is_contact: - contributors -is_deceased: -- murine-source -is_embryo: -- murine-source is_targeted: - af - bulkatacseq @@ -646,6 +644,8 @@ is_technical_replicate: - scrnaseq - scrnaseq-hca - slideseq +kidney_donor_profile_index_value: +- donor lab_id: - organ label_name: @@ -766,10 +766,6 @@ library_pcr_cycles_for_sample_index: - slideseq library_preparation_kit: - bulkatacseq -light_cycle: -- murine-source -local_lifespan_data: -- murine-source lot_number: - antibodies mass_resolving_power: @@ -813,6 +809,10 @@ max_y_height_value: - mri - oct - ultrasound +mechanism_of_injury: +- donor +medical_history: +- donor middle_name_or_initial: - contributors ms_scan_mode: @@ -1204,8 +1204,8 @@ quality_criteria: - sample-suspension quality_view: - ultrasound -rack_setup: -- murine-source +race: +- donor range_z_unit: - lightsheet range_z_value: @@ -1347,10 +1347,6 @@ roi_id: - mri - oct - ultrasound -room_health_status: -- murine-source -room_temperature: -- murine-source rr_id: - antibodies sample_id: @@ -1443,7 +1439,7 @@ sequencing_reagent_kit: - slideseq - wgs sex: -- murine-source +- donor signal_type: - imc - imc3d @@ -1458,8 +1454,6 @@ sn_quality: - oct sn_quality_unit: - oct -source_id: -- murine-source source_project: - scrnaseq-hca source_storage_time_unit: @@ -1507,10 +1501,6 @@ storage_method: - sample-block - sample-section - sample-suspension -strain: -- murine-source -strain_rrid: -- murine-source suspension_enriched: - sample-suspension suspension_enriched_target: @@ -1632,15 +1622,15 @@ warm_ischemic_time_unit: - organ warm_ischemic_time_value: - organ -water_source: -- murine-source wavelength_unit: - oct wavelength_value: - oct weight_unit: +- donor - organ - sample-block weight_value: +- donor - sample-block diff --git a/docs/field-types.yaml b/docs/field-types.yaml index 383cf41f9..b5a00bcd0 100644 --- a/docs/field-types.yaml +++ b/docs/field-types.yaml @@ -10,6 +10,8 @@ acquisition_instrument_vendor: string acquisition_matrix_size_in_frequency_encoding_direction: integer acquisition_matrix_size_in_phase_encoding_direction: integer affiliation: string +age_unit: string +age_value: number analyte_class: string antibodies_path: string antibody_name: string @@ -22,14 +24,15 @@ assay_type: string bead_barcode_offset: string bead_barcode_read: string bead_barcode_size: string -bedding: string +blood_type: string +body_mass_index_value: number bulk_atac_cell_isolation_protocols_io_doi: string bulk_rna_isolation_protocols_io_doi: string bulk_rna_isolation_quality_metric_value: number bulk_rna_yield_units_per_tissue_unit: string bulk_rna_yield_value: number bulk_transposition_input_number_nuclei: string -cage_enhancements: string +cause_of_death: string ce_background_electrolyte: string ce_capillary_coating: string ce_electroosmotic_flow: string @@ -62,13 +65,10 @@ current_value: number data_collection_mode: string data_path: string data_precision_bytes: number -date_of_birth_or_fertilization: datetime -date_of_death: datetime description: string desi_solvent: string desi_solvent_flow_rate: number desi_solvent_flow_rate_unit: string -diet: string dilution: string dms: boolean dna_assay_input_unit: string @@ -79,7 +79,6 @@ dynamic_contrast: integer echo_time: number echo_train_length: integer end_datetime: datetime -euthanization_method: string execution_datetime: datetime expected_cell_count: integer field_strength_unit: string @@ -93,6 +92,8 @@ gdna_fragmentation_quality_assurance: string guard_column: string harmonics: string health_status: string +height_unit: string +height_value: number histological_report: string imaging_threshold_unit_value: number imaging_threshold_value: number @@ -103,10 +104,9 @@ ion_mobility: string ion_source: string is_cedar: boolean is_contact: boolean -is_deceased: boolean -is_embryo: boolean is_targeted: boolean is_technical_replicate: boolean +kidney_donor_profile_index_value: number lab_id: string label_name: string labeling: string @@ -143,14 +143,14 @@ library_layout: string library_pcr_cycles: integer library_pcr_cycles_for_sample_index: integer library_preparation_kit: string -light_cycle: string -local_lifespan_data: string lot_number: string mass_resolving_power: number max_x_width_unit: string max_x_width_value: number max_y_height_unit: string max_y_height_value: number +mechanism_of_injury: string +medical_history: string middle_name_or_initial: string ms_scan_mode: string ms_source: string @@ -223,7 +223,7 @@ protocols_io_doi: string puck_id: string quality_criteria: string quality_view: string -rack_setup: string +race: string range_z_unit: string range_z_value: number reagent_prep_protocols_io_doi: string @@ -240,8 +240,6 @@ rnaseq_assay_input_value: number rnaseq_assay_method: string roi_description: string roi_id: integer -room_health_status: string -room_temperature: number rr_id: string sample_id: string sample_quality_metric: string @@ -267,7 +265,6 @@ signal_type: string single_file_export_format: string sn_quality: integer sn_quality_unit: string -source_id: string source_project: string source_storage_time_unit: string source_storage_time_value: number @@ -284,8 +281,6 @@ start_datetime: datetime step_z_value: number storage_media: string storage_method: string -strain: string -strain_rrid: string suspension_enriched: string suspension_enriched_target: string suspension_entity: string @@ -315,7 +310,6 @@ warm_ischemia_time_unit: string warm_ischemia_time_value: number warm_ischemic_time_unit: string warm_ischemic_time_value: number -water_source: string wavelength_unit: string wavelength_value: number weight_unit: string