This repository has been archived by the owner on Jul 8, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
.shed.yml
160 lines (155 loc) · 10.6 KB
/
.shed.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
owner: "trinity_ctat"
remote_repository_url: "https://github.com/NCIP/Trinity_CTAT_galaxy"
homepage_url: "https://github.com/NCIP/Trinity_CTAT_galaxy/wiki"
categories:
- "Assembly"
- "RNA"
- "Sequence Analysis"
repositories:
ctat_clean_headers:
description: "Removes whitespace from the header of each selected fastq file."
long_description: "Removes whitespace from the header of each selected fastq file. If your Trinity run gives you errors with dying threads during the normalization step, try this tool on each input first."
include:
- ctat_clean_headers.xml
ctat_concatenate:
description: "Concatenates multiple files to one file."
long_description: "A concatenate tool is included in this suite for completeness purposes. Concatenates multiple files to one file."
include:
- ctat_concatenate.xml
- ctat_bash_command_executer.py
- test-data/Sp_ds.left.fq
- test-data/Sp_ds.right.fq
- test-data/Sp_hs.left.fq
- test-data/Sp_hs.right.fq
ctat_trinity_rnaseq:
description: "Trinity assembles transcript sequences from Illumina RNA-Seq data."
long_description: "Trinity, developed at the Broad Institute and the [Hebrew University of Jerusalem] (http://www.cs.huji.ac.il), represents a novel method for the efficient and robust de novo reconstruction of transcriptomes from RNA-seq data. Trinity combines three independent software modules: Inchworm, Chrysalis, and Butterfly, applied sequentially to process large volumes of RNA-seq reads. Trinity partitions the sequence data into many individual de Bruijn graphs, each representing the transcriptional complexity at a given gene or locus, and then processes each graph independently to extract full-length splicing isoforms and to tease apart transcripts derived from paralogous genes."
include:
- ctat_trinity_rnaseq.xml
- ctat_trinity_wrapper.py
- test-data/reads.left.simPE.fq
- test-data/reads.right.simPE.fq
- test-data/Sp.cat_ds_hs.left.fq
- test-data/Sp.cat_ds_hs.right.fq
ctat_rsem_align_and_estimate_abundance:
description: "Align and Estimate Abundance generates transcript quantification for genes and isoforms using RSEM."
long_description: "Align and Estimate Abundance generates transcript quantification for genes and isoforms using RSEM. RSEM enables accurate transcript quantification for species without sequenced genomes."
include:
- ctat_rsem_align_and_estimate_abundance.xml
- ctat_trinity_tool_wrapper.py
- test-data/reads.simPE.Trinity.fasta
- test-data/reads.left.simPE.fq
- test-data/reads.right.simPE.fq
- test-data/Sp.Trinity.fasta
- test-data/Sp_hs.left.fq
- test-data/Sp_hs.right.fq
ctat_abundance_estimation_to_matrix:
description: "Abundance estimation to matrix joins RSEM-computed gene or isoform fragment counts into a matrix file."
long_description: "Abundance estimation to matrix joins RSEM-computed gene or isoform fragment counts into a matrix file."
include:
- ctat_abundance_estimation_to_matrix.xml
- ctat_abundance_estimation_to_matrix.py
- ctat_trinity_tool_wrapper.py
- test-data/Sp_ds.RSEM.genes.results
- test-data/Sp_hs.RSEM.genes.results
ctat_edger_differential_expression:
description: "EdgeR differential expression identifies differentially expressed transcripts."
long_description: "EdgeR differential expression uses the counts_matrix from abundance_estimation_to_matrix to identify differentially expressed transcripts."
include:
- ctat_edger_differential_expression.xml
- ctat_edger_differential_expression.py
- test-data/Sp.counts.matrix
- test-data/Sp.Trinity.fasta
ctat_analyze_differential_expression:
description: "Analyze differential expression creates anaylses files from the output from EdgeR differential expression."
long_description: "Analyze differential expression creates anaylses files from the output from EdgeR differential expression."
include:
- ctat_analyze_differential_expression.xml
- ctat_analyze_differential_expression.py
- test-data/Sp.edgeR.tar.gz
- test-data/Sp.TMM.EXPR.matrix
ctat_discasm:
description: "DISCASM extracts reads that map to reference genomes in a discordant fashion and performs a de novo transcriptome assembly of these reads"
long_description: "DISCASM aims to extract reads that map to reference genomes in a discordant fashion and optionally include reads that do not map to the genome at all, and perform a de novo transcriptome assembly of these reads. DISCASM relies on the output from STAR (as run via STAR-Fusion), and supports de novo transcriptome assembly using Trinity or Oases. - https://github.com/DISCASM/DISCASM/wiki"
include:
- ctat_discasm.xml
- test-data/DISCASM/SF2/reads_1.fq.gz
- test-data/DISCASM/SF2/reads_2.fq.gz
- test-data/DISCASM/SF2/Aligned.sortedByCoord.out.bam
- test-data/DISCASM/SF2/Chimeric.out.junction
- test-data/DISCASM/reads_1_2.oases.transcripts.fa
- test-data/DISCASM/SF1/reads.left.simPE.fq
- test-data/DISCASM/SF1/reads.right.simPE.fq
- test-data/DISCASM/SF1/SF_out_aligned.bam
- test-data/DISCASM/SF1/SF_out_chimeric.junction
- test-data/DISCASM/reads.simPE.oases.transcripts.fa
ctat_gmap_fusion:
description: "GMAP-fusion is a utility for identifying candidate fusion transcripts."
long_description: "GMAP-fusion is a utility for identifying candidate fusion transcripts based on transcript sequences reconstructed via RNA-Seq de novo transcriptome assembly. - https://github.com/GMAP-fusion/GMAP-fusion/wiki"
include:
- ctat_gmap_fusion.xml
- tool_data_table_conf.xml.sample
- tool-data/ctat_genome_resource_libs.loc.sample
- test-data/GMAP/reads_1.fq.gz
- test-data/GMAP/reads_2.fq.gz
- test-data/GMAP/transcripts.fa
- test-data/GMAP/fusion.reads_1_2.final
ctat_fusion_inspector:
description: "FusionInspector performs a supervised analysis of fusion predictions, attempting to recover and re-score evidence for such predictions."
long_description: "FusionInspector is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). FusionInspector assists in fusion transcript discovery by performing a supervised analysis of fusion predictions, attempting to recover and re-score evidence for such predictions. Given a list of candidate fusion genes (as derived from running any fusion transcript prediction tool, such as Prada, FusionCatcher, SoapFuse, TophatFusion, DISCASM/GMAP-Fusion, STAR-Fusion, or other), FusionInspector extracts the genomic regions for the fusion partners and constructs mini-fusion-contigs containing the pairs of genes in their proposed fused orientation. - https://github.com/FusionInspector/FusionInspector/wiki"
include:
- ctat_fusion_inspector.xml
- tool_data_table_conf.xml.sample
- tool-data/ctat_genome_resource_libs.loc.sample
- test-data/FusionInspector/fusion_targets.A.txt
- test-data/FusionInspector/fusion_targets.B.txt
- test-data/FusionInspector/fusion_targets.C.txt
- test-data/FusionInspector/test.reads_1.fastq.gz
- test-data/FusionInspector/test.reads_2.fastq.gz
ctat_star_fusion:
description: "STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads."
long_description: "STAR-Fusion is a component of the Trinity Cancer Transcriptome Analysis Toolkit (CTAT). STAR-Fusion uses the STAR aligner to identify candidate fusion transcripts supported by Illumina reads. STAR-Fusion further processes the output generated by the STAR aligner to map junction reads and spanning reads to a reference annotation set. - https://github.com/STAR-Fusion/STAR-Fusion/wiki"
include:
- ctat_star_fusion.xml
- tool_data_table_conf.xml.sample
- tool-data/ctat_genome_resource_libs.loc.sample
- test-data/StarFusion/reads_1.fq.gz
- test-data/StarFusion/reads_2.fq.gz
ctat_metagenomics:
description: "Classifier for metagenomic sequences (RNA-Seq)"
long_description: "Classifier for metagenomic sequences (RNA-Seq). For foreign transcript detection, we leverage Centrifuge and Kraken, leveraging RNA-Seq reads and Trinity-reconstructed transcripts. Our efforts here are being carried out in collaboration with the group of Steven Salzberg at JHU."
include:
- ctat_metagenomics.xml
- tool_data_table_conf.xml.sample
- tool-data/ctat_centrifuge_indexes.loc.sample
- test-data/centrifuge/SRR2219890_1_adj_20k_reads.fastq
- test-data/centrifuge/SRR2219890_1_20k_reads.classification.report.txt
- test-data/centrifuge/SRR2219890_1_20k_reads.kraken_style_report.txt
ctat_lncrna:
description: "lncRNA discovery from RNA-Seq data"
long_description: "This tool uses slncky for lncRNA discovery from RNA-Seq data. slncky filters a high-quality set of noncoding transcripts, discovers lncRNA orthologs, and characterizes conserved lncRNA evolution."
include:
- ctat_lncrna.xml
- tool_data_table_conf.xml.sample
- tool-data/ctat_lncrna_annotations.loc.sample
- test-data/slncky/reads.simPE.browse.html
- test-data/slncky/reads.simPE.canonical_to_lncs.txt
- test-data/slncky/reads.simPE.canonical_to_lncs.txt.sorted
- test-data/slncky/reads.simPE.cluster_info.txt
- test-data/slncky/reads.simPE.cluster_info.txt.sorted
- test-data/slncky/reads.simPE.filtered_info.txt
- test-data/slncky/reads.simPE.filtered_info.txt.sorted
- test-data/slncky/reads.simPE.lncs.bed
- test-data/slncky/reads.simPE.lncs.bed.sorted
- test-data/slncky/reads.simPE.lncs.info.txt
- test-data/slncky/reads.simPE.lncs.info.txt.sorted
- test-data/slncky/reads.simPE.orfs.txt
- test-data/slncky/reads.simPE.orthologs.top.txt
- test-data/slncky/reads.simPE.orthologs.top.txt.sorted
- test-data/slncky/reads.simPE.orthologs.txt
- test-data/slncky/reads.simPE.orthologs.txt.sorted
suite:
name: suite_ctat_tools
version: 1.0.0
description: "These tools were developed by The Broad Institute as the Cancer Transcriptome Analysis Toolkit (CTAT)."
long_description: "The Cancer Transcriptome Analysis Toolkit (CTAT) aims to provide tools for leveraging RNA-Seq to gain insights into the biology of cancer transcriptomes. Bioinformatics tool support is provided for mutation detection, fusion transcript identification, de novo transcript assembly of cancer-specific transcripts, lincRNA classification, and foreign transcript detection (viruses, microbes). CTAT is funded by the National Cancer Institute Informatics Technology for Cancer Research (NCI ITCR) program. "