Skip to content

Commit

Permalink
Merge pull request #7 from jhuapl-bio/prerelease
Browse files Browse the repository at this point in the history
V1.1.0 release
  • Loading branch information
Merritt-Brian authored Nov 12, 2021
2 parents 0409040 + 4628839 commit 58de2c1
Show file tree
Hide file tree
Showing 11 changed files with 4,432 additions and 131 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,8 @@

data/*
databases/*
test-data/*
!test-data/BC0*.fastq
!test-data/sample.fastq
tmp/*
taxdump
49 changes: 33 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,59 @@ WORKDIR /opt/software
SHELL ["/bin/bash", "--login", "-c"]

# install apt dependencies and update conda
RUN apt-get update && apt-get install git -y \
RUN apt-get --allow-releaseinfo-change update && apt-get install git -y \
&& apt-get install -y apt-transport-https ca-certificates wget unzip bzip2 libfontconfig1 \
&& update-ca-certificates \
&& apt-get -qq -y remove curl \
&& apt-get install -y g++ gcc \
&& apt-get -qq -y autoremove \
&& apt-get autoclean \
&& rm -rf /var/lib/apt/lists/* /var/log/dpkg.log

# Set up conda environment
ENV PATH /opt/conda/bin:$PATH
RUN conda config --set ssl_verify no
COPY ./environment.yml /opt/environment.yml

RUN conda env create -f /opt/environment.yml

COPY databases /opt/databases
SHELL ["conda", "run", "-n", "mytax", "/bin/bash", "-c"]




# Define databases that are shipped with the image
# COPY databases /opt/databases

WORKDIR /opt/databases
RUN wget http://ccb.jhu.edu/software/kraken/dl/minikraken_20171019_4GB.tgz && \
tar -xvzf minikraken_20171019_4GB.tgz && \
mkdir -p /opt/databases && \
mv minikraken_20171013_4GB /opt/databases/minikraken && \
rm minikraken_20171019_4GB.tgz
# RUN find /opt/databases -name "*tar.gz" -exec tar -xvzf {} \;

RUN mkdir -p /opt/databases && \
wget ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/old/minikraken2_v2_8GB_201904.tgz -O /opt/databases/minikraken2.tar.gz && \
tar -xvzf minikraken2.tar.gz && rm -rf /opt/databases/minikraken2.tar.gz

WORKDIR /opt/software/mytax
COPY src /opt/software/mytax
COPY src/*.sh /opt/software/mytax/
RUN find . -name "*.sh" | while read fn; do ln -s $PWD/$fn /usr/local/bin; done
RUN conda activate mytax && bash process_krakendb.sh -k /opt/databases/minikraken
# # Define Flukraken by building from source with jellyfish
RUN source /opt/conda/etc/profile.d/conda.sh && conda activate mytax && \
bash /opt/software/mytax/build_flukraken.sh -k /opt/databases/flukraken2

## Get Centrifuge database
# RUN wget https://genome-idx.s3.amazonaws.com/centrifuge/p_compressed%2Bh%2Bv.tar.gz -O /opt/databases/centrifuge.tgz && \
# mkdir -p /opt/databases/centrifuge && tar -xvzf /opt/databases/centrifuge.tgz --directory /opt/databases/centrifuge/


# Install Centrifuge
COPY ./install.sh /opt/install.sh
RUN bash /opt/install.sh


# RUN conda activate mytax && \
# bash build_flukraken.sh -k flukraken && \
# rm -r flukraken/library flukraken/raw flukraken/database.jdb* && \
# tar c flukraken | gzip -c | tee flukraken.tar.gz && \
# rm -rf flukraken
COPY src/*.py /opt/software/mytax/

COPY sunburst /opt/software/mytax/sunburst
RUN python3 /opt/software/mytax/generate_hierarchy.py -o /taxdump -download --report sample.report
RUN rm -rf /tmp/*
WORKDIR /opt/software/mytax/

# ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "mytax", "bash", "-c"]


105 changes: 105 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,111 @@ The single script `build_flukraken.sh` functions as an outer wrapper for the inf
process_krakendb.sh -> post-processes database for visualization pipeline (not included in this repo yet)
```

## Running process script on kraken/kraken2 report and outfiles

### If running from Docker

docker build . -t jhuaplbio/mytax

Unix

`docker container run -it --rm -v $PWD:/data jhuaplbio/mytax bash`

Windows Powershell

`docker container run -it --rm -v $pwd:/data jhuaplbio/mytax bash`

```
# Run the installation script
# Activate the env, this will contain kraken2 and centrifuge scripts to build the database if needed as well as kraken2 and centrifuge dependencies
conda activate mytax
## Lets make a sample.fastq from test-data
# Kraken1 and Centrifuge must be compiled from scratch, due to issues with libraries/binaries on compilation and execution
bash install.sh
# first, download ncbi taxdump
python3 src/generate_hierarchy.py -o $PWD/taxdump --report test-data/sample.report -download
rm taxdump.tar.gz
# create the kraken output first, (report and outfile)
## DEPRECATED Kraken 1
mkdir -p databases/minikraken1
wget https://ccb.jhu.edu/software/kraken/dl/minikraken_20171019_4GB.tgz -O databases/minikraken1.tgz
tar -xvzf databases/minikraken1.tgz --directory databases/
export kraken1db=databases/minikraken_20171013_4GB && \
kraken --db $kraken1db --output test-data/sample.out test-data/sample.fastq && \
kraken-report --db $kraken1db test-data/sample.out | tee test-data/sample.report
## Also, use kraken 2
## export kraken2db to env variable
### IF you've made flukraken2 in tmp or....
export KRAKEN2_DEFAULT_DB="tmp/flukraken2
### IF you have a pre-made minikraken/other kraken db ready
export KRAKEN2_DEFAULT_DB="databases/minikraken2_v2_8GB_201904_UPDATE" && \
kraken2 --output test-data/sample.out --report test-data/sample.report test-data/sample.fastq
### if you need minikraken2
mkdir -p databases/
wget ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/old/minikraken2_v2_8GB_201904.tgz -O databases/minikraken2.tgz
tar -xvzf databases/minikraken2.tgz --directory databases/
## User Centrifuge
### Install centrifuge
bash install.sh
### Set up centrifuge env
mkdir -p databases/centrifuge
wget https://genome-idx.s3.amazonaws.com/centrifuge/p_compressed%2Bh%2Bv.tar.gz -O databases/centrifuge.tgz
tar -xvzf databases/centrifuge.tgz --directory databases/centrifuge/
export centrifugedb=databases/ # example
## run classify
## If you need to make a new database, see here: $CONDA_PREFIX/lib/centrifuge/centrifuge-build --taxonomy-tree taxdump/nodes.dmp --name-table taxdump/names.dmp sample.fastq sample
$CONDA_PREFIX/lib/centrifuge/centrifuge -f -x databases/centrifuge/p_compressed+h+v -q test-data/sample.fastq --report test-data/sample.centrifuge.report > test-data/sample.out
$CONDA_PREFIX/lib/centrifuge/centrifuge-kreport -x databases/centrifuge/p_compressed+h+v test-data/sample.centrifuge.report > test-data/sample.report
# Next, generate the hierarchy json file
python3 src/generate_hierarchy.py \
-o $PWD/test-data/sample.fullstring \
--report test-data/sample.report \
-taxdump taxdump/nodes.dmp
#Get the json for mytax sunburst plot
bash krakenreport2json.sh -i test-data/sample.fullstring -o test-data/sample.json
```

The resulting file can then imported into the sunburst plot at `sunburst/index.html` rendered with a simple `http.server` protocol

# License and copyright

Copyright (c) 2019 Thomas Mehoke
Expand Down
17 changes: 6 additions & 11 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,13 @@ name: mytax
channels:
- conda-forge
- bioconda
- anaconda
- defaults
- anaconda
- r
dependencies:
- gawk=5.1.0
- gettext=0.19.8.1
- jellyfish=1.1.12
- libcxx=10.0.0
- libffi=3.3
- libiconv=1.16
- kraken2=2.1.2
- kmer-jellyfish=1.1.12
- make=4.3
- ncurses=6.2
- perl=5.26.2
- readline=8.0
- kraken=1.1
- centrifuge=1.0.3
- pandas=1.3.4
- anytree=2.8.0
23 changes: 23 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@

# conda create -f environment.yaml


## Make sure that mytax env is activated first!
# # Install kraken1
# rm -rf $CONDA_PREFIX/lib/kraken1
# git clone https://github.com/DerrickWood/kraken.git $CONDA_PREFIX/lib/kraken1
# cd $CONDA_PREFIX/lib/kraken1
# echo $PWD
# bash install_kraken.sh $CONDA_PREFIX/bin

# find envs -name "*.yml" -exec conda create -f {} \;

#If you have a Mac, make sure Xcode is updated and installed. If windows, use cygwin or install in a docker container
# try
git clone https://github.com/infphilo/centrifuge $CONDA_PREFIX/lib/centrifuge && \
make -C $CONDA_PREFIX/lib/centrifuge && \
make install -C $CONDA_PREFIX/lib/centrifuge prefix=$CONDA_PREFIX || # catch
cp $CONDA_PREFIX/lib/centrifuge/centrifuge-* $CONDA_PREFIX/bin/ && chmod +x $CONDA_PREFIX/bin/centrifuge*



Loading

0 comments on commit 58de2c1

Please sign in to comment.