-
Notifications
You must be signed in to change notification settings - Fork 0
/
06- decontam.Rmd
151 lines (126 loc) · 13 KB
/
06- decontam.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
---
title: "decontam"
author: "Marwa Tawfik"
summary: "NP_devStages_ampliseq"
Platform: "R version 4.1.0 (2021-05-18) -- Camp Pontanezen; x86_64-conda-linux-gnu (64-bit)"
date: "22 October 2022"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
# decontam
# load libraries ----
library(decontam); packageVersion("decontam")
# Warning message:
# package decontam was built under R version 4.1.1
# [1] ‘1.14.0’
```
```{r}
### Setting up
# inspection to get familiar with our input data before processing
sample_data(ps) # to see samples of phyloseq object
head(sample_data(ps)) # to view only first 6 of sample_data(ps)
sample_data(ps)$sample_or_control # to view the sample_or_control column
```
```{r}
### Inspect Library Sizes
df <- as.data.frame(sample_data(ps)) # Put sample_data into a ggplot-friendly data.frame (ggplot accept df)
df$LibrarySize <- sample_sums(ps)
df <- df[order(df$LibrarySize),]
df$Index <- seq(nrow(df))
ggplot(data=df, aes(x=Index, y=LibrarySize, color = sample)) + geom_point()
ggsave("figures/library_sampleVScontrol.tiff", height = 5, width = 10)
ggplot(data=df, aes(x=Index, y=LibrarySize, color = sample_or_control)) + geom_point()
ggsave("figures/library_sampleVScontrol1.tiff", height = 5, width = 10)
```
```{r}
### Identify Contaminants - Prevalence based method
# select the negative you will work with
sample_data(ps)$is.neg <- sample_data(ps)$sample_or_control == "negative"
contamdf <- isContaminant(ps, method="prevalence", neg="is.neg")
table(contamdf$contaminant)
# FALSE TRUE
# 7482 1
head(which(contamdf$contaminant))
# [1] 15
contamdf05 <- isContaminant(ps, method="prevalence", neg="is.neg", threshold=0.5)
table(contamdf05$contaminant)
# FALSE TRUE
# 7480 3
```
```{r}
# Make phyloseq object of presence-absence in negative controls and true samples
ps.pa <- transform_sample_counts(ps, function(abund) 1*(abund>0))
ps.pa.neg <- prune_samples(sample_data(ps.pa)$is.neg, ps.pa)
ps.pa.pos <- prune_samples(sample_data(ps.pa)$sample_or_control == "sample", ps.pa)
```
```{r}
# Make data.frame of prevalence in positive and negative samples
df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
contaminant=contamdf05$contaminant)
ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + geom_point() +
xlab("Prevalence (Negative Controls)") + ylab("Prevalence (True Samples)")
ggsave("figures/prevalence_controls.tiff", height = 5, width = 10)
```
```{r}
ps.noncontam <- prune_taxa(!contamdf05$contaminant, ps)
ps.noncontam
# phyloseq-class experiment-level object
# otu_table() OTU Table: [ 7480 taxa and 137 samples ]
# sample_data() Sample Data: [ 137 samples by 12 sample variables ]
# tax_table() Taxonomy Table: [ 7480 taxa by 7 taxonomic ranks ]
# removing Methylobacterium-Methylorubrum
filterPhyla <- "Methylobacterium-Methylorubrum"
ps.noncontam <- subset_taxa(ps.noncontam, !Genus %in% filterPhyla)
ps.noncontam
# phyloseq-class experiment-level object
# otu_table() OTU Table: [ 7458 taxa and 137 samples ]
# sample_data() Sample Data: [ 137 samples by 10 sample variables ]
# tax_table() Taxonomy Table: [ 7458 taxa by 7 taxonomic ranks ]
saveRDS(ps.noncontam, "phyobjects/ps.noncontam.rds")
ps
# phyloseq-class experiment-level object
# otu_table() OTU Table: [ 7483 taxa and 137 samples ]
# sample_data() Sample Data: [ 137 samples by 12 sample variables ]
# tax_table() Taxonomy Table: [ 7483 taxa by 7 taxonomic ranks ]
```
```{r}
# to check for contaminants names that was removed (in addition to Methylobacterium-Methylorubrum which was removed manually)
is.contam <- contamdf05$contaminant %in% TRUE
taxa_names(ps)[is.contam]
tax_table(ps)[is.contam,]
# at the end of the sequence line > there is the name of the taxa
# Kingdom
# TAGGGAATATTGGGCAATGGAGGCAACTCTGACCCAGCCATGCCGCGTGCAGGAAGAAGGCGTTATGCGTTGTAAACTGCTTTTATATAGGAAGAAATAGTCCTTGCGAGGAAAGTTGACGGTACTATATGAATAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTATTAAGTCAGTGGTGAAAGACGGTCGCTCAACGATTGCAGTGCCATTGATACTGGTAGACTTGAGTGGGATTGAGGTAGCTGGAATGGATAGTGTAGCGGTGAAATGCATAGATATTATCCAGAACACCAATTGCGTAGGCAAGTTACTAAGTCTCAACTGACGCTGAGGCACGAAAGTGTGGGTATCAAACA "Bacteria"
# TGAGGAATATTGGTCAATGGGCGAGAGCCTGAACCAGCCAAGTCGCGTGAAGGAAGACTGTCCTAAGGATTGTAAACTTCTTTTATACGGGAATAACGGGCGATACGAGTATTGCATTGAATGTACCGTAAGAATAAGCATCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGTTGTTCGGTAAGTCAGCGGTGAAACCTGAGCGCTCAACGTTCAGCCTGCCGTTGAAACTGCCGGGCTTGAGTTCAGCGGCGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATATCACGAGGAACTCCGATTGCGAAGGCAGCTTGCCATACTGCGACTGACACTGAAGCACGAAGGCGTGGGTATCAAACA "Bacteria"
# TCGAGAATCTTCCGCAATGCACGAAAGTGTGACGGAGCGACGCCGCGTGATGGATGAAGCCCCTTGGGGTGTAAACATCTTTTATGAATGAAGAAATTTTTGACACTAGTTCATGAATAAGAGGCTCCTAACTCTGTGCCAGCAGGAGCGGTAATACAGAGGCCTCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGTGGTTATGTTAGTCGAATTTCAAAGACCCGGGCTCAACCTGGGGAAGGGATTCGAAACGGCATGACTAGAATTAGTGAGAGGTGTGCAGAACTCATGGAGTAGGGGTGAAATCCGTTGATCTCATGGGGAATACCAAATGCGAAGGCAGCACACTGGCGCTATATTGACACTGAGGCACGAAAGCGTGGGTAGCGAATG "Bacteria"
# Phylum
# TAGGGAATATTGGGCAATGGAGGCAACTCTGACCCAGCCATGCCGCGTGCAGGAAGAAGGCGTTATGCGTTGTAAACTGCTTTTATATAGGAAGAAATAGTCCTTGCGAGGAAAGTTGACGGTACTATATGAATAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTATTAAGTCAGTGGTGAAAGACGGTCGCTCAACGATTGCAGTGCCATTGATACTGGTAGACTTGAGTGGGATTGAGGTAGCTGGAATGGATAGTGTAGCGGTGAAATGCATAGATATTATCCAGAACACCAATTGCGTAGGCAAGTTACTAAGTCTCAACTGACGCTGAGGCACGAAAGTGTGGGTATCAAACA "Bacteroidota"
# TGAGGAATATTGGTCAATGGGCGAGAGCCTGAACCAGCCAAGTCGCGTGAAGGAAGACTGTCCTAAGGATTGTAAACTTCTTTTATACGGGAATAACGGGCGATACGAGTATTGCATTGAATGTACCGTAAGAATAAGCATCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGTTGTTCGGTAAGTCAGCGGTGAAACCTGAGCGCTCAACGTTCAGCCTGCCGTTGAAACTGCCGGGCTTGAGTTCAGCGGCGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATATCACGAGGAACTCCGATTGCGAAGGCAGCTTGCCATACTGCGACTGACACTGAAGCACGAAGGCGTGGGTATCAAACA "Bacteroidota"
# TCGAGAATCTTCCGCAATGCACGAAAGTGTGACGGAGCGACGCCGCGTGATGGATGAAGCCCCTTGGGGTGTAAACATCTTTTATGAATGAAGAAATTTTTGACACTAGTTCATGAATAAGAGGCTCCTAACTCTGTGCCAGCAGGAGCGGTAATACAGAGGCCTCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGTGGTTATGTTAGTCGAATTTCAAAGACCCGGGCTCAACCTGGGGAAGGGATTCGAAACGGCATGACTAGAATTAGTGAGAGGTGTGCAGAACTCATGGAGTAGGGGTGAAATCCGTTGATCTCATGGGGAATACCAAATGCGAAGGCAGCACACTGGCGCTATATTGACACTGAGGCACGAAAGCGTGGGTAGCGAATG "Patescibacteria"
# Class
# TAGGGAATATTGGGCAATGGAGGCAACTCTGACCCAGCCATGCCGCGTGCAGGAAGAAGGCGTTATGCGTTGTAAACTGCTTTTATATAGGAAGAAATAGTCCTTGCGAGGAAAGTTGACGGTACTATATGAATAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTATTAAGTCAGTGGTGAAAGACGGTCGCTCAACGATTGCAGTGCCATTGATACTGGTAGACTTGAGTGGGATTGAGGTAGCTGGAATGGATAGTGTAGCGGTGAAATGCATAGATATTATCCAGAACACCAATTGCGTAGGCAAGTTACTAAGTCTCAACTGACGCTGAGGCACGAAAGTGTGGGTATCAAACA "Bacteroidia"
# TGAGGAATATTGGTCAATGGGCGAGAGCCTGAACCAGCCAAGTCGCGTGAAGGAAGACTGTCCTAAGGATTGTAAACTTCTTTTATACGGGAATAACGGGCGATACGAGTATTGCATTGAATGTACCGTAAGAATAAGCATCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGTTGTTCGGTAAGTCAGCGGTGAAACCTGAGCGCTCAACGTTCAGCCTGCCGTTGAAACTGCCGGGCTTGAGTTCAGCGGCGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATATCACGAGGAACTCCGATTGCGAAGGCAGCTTGCCATACTGCGACTGACACTGAAGCACGAAGGCGTGGGTATCAAACA "Bacteroidia"
# TCGAGAATCTTCCGCAATGCACGAAAGTGTGACGGAGCGACGCCGCGTGATGGATGAAGCCCCTTGGGGTGTAAACATCTTTTATGAATGAAGAAATTTTTGACACTAGTTCATGAATAAGAGGCTCCTAACTCTGTGCCAGCAGGAGCGGTAATACAGAGGCCTCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGTGGTTATGTTAGTCGAATTTCAAAGACCCGGGCTCAACCTGGGGAAGGGATTCGAAACGGCATGACTAGAATTAGTGAGAGGTGTGCAGAACTCATGGAGTAGGGGTGAAATCCGTTGATCTCATGGGGAATACCAAATGCGAAGGCAGCACACTGGCGCTATATTGACACTGAGGCACGAAAGCGTGGGTAGCGAATG "Parcubacteria"
# Order
# TAGGGAATATTGGGCAATGGAGGCAACTCTGACCCAGCCATGCCGCGTGCAGGAAGAAGGCGTTATGCGTTGTAAACTGCTTTTATATAGGAAGAAATAGTCCTTGCGAGGAAAGTTGACGGTACTATATGAATAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTATTAAGTCAGTGGTGAAAGACGGTCGCTCAACGATTGCAGTGCCATTGATACTGGTAGACTTGAGTGGGATTGAGGTAGCTGGAATGGATAGTGTAGCGGTGAAATGCATAGATATTATCCAGAACACCAATTGCGTAGGCAAGTTACTAAGTCTCAACTGACGCTGAGGCACGAAAGTGTGGGTATCAAACA "Cytophagales"
# TGAGGAATATTGGTCAATGGGCGAGAGCCTGAACCAGCCAAGTCGCGTGAAGGAAGACTGTCCTAAGGATTGTAAACTTCTTTTATACGGGAATAACGGGCGATACGAGTATTGCATTGAATGTACCGTAAGAATAAGCATCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGTTGTTCGGTAAGTCAGCGGTGAAACCTGAGCGCTCAACGTTCAGCCTGCCGTTGAAACTGCCGGGCTTGAGTTCAGCGGCGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATATCACGAGGAACTCCGATTGCGAAGGCAGCTTGCCATACTGCGACTGACACTGAAGCACGAAGGCGTGGGTATCAAACA "Bacteroidales"
# TCGAGAATCTTCCGCAATGCACGAAAGTGTGACGGAGCGACGCCGCGTGATGGATGAAGCCCCTTGGGGTGTAAACATCTTTTATGAATGAAGAAATTTTTGACACTAGTTCATGAATAAGAGGCTCCTAACTCTGTGCCAGCAGGAGCGGTAATACAGAGGCCTCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGTGGTTATGTTAGTCGAATTTCAAAGACCCGGGCTCAACCTGGGGAAGGGATTCGAAACGGCATGACTAGAATTAGTGAGAGGTGTGCAGAACTCATGGAGTAGGGGTGAAATCCGTTGATCTCATGGGGAATACCAAATGCGAAGGCAGCACACTGGCGCTATATTGACACTGAGGCACGAAAGCGTGGGTAGCGAATG "Candidatus Nomurabacteria"
# Family
# TAGGGAATATTGGGCAATGGAGGCAACTCTGACCCAGCCATGCCGCGTGCAGGAAGAAGGCGTTATGCGTTGTAAACTGCTTTTATATAGGAAGAAATAGTCCTTGCGAGGAAAGTTGACGGTACTATATGAATAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTATTAAGTCAGTGGTGAAAGACGGTCGCTCAACGATTGCAGTGCCATTGATACTGGTAGACTTGAGTGGGATTGAGGTAGCTGGAATGGATAGTGTAGCGGTGAAATGCATAGATATTATCCAGAACACCAATTGCGTAGGCAAGTTACTAAGTCTCAACTGACGCTGAGGCACGAAAGTGTGGGTATCAAACA "Spirosomaceae"
# TGAGGAATATTGGTCAATGGGCGAGAGCCTGAACCAGCCAAGTCGCGTGAAGGAAGACTGTCCTAAGGATTGTAAACTTCTTTTATACGGGAATAACGGGCGATACGAGTATTGCATTGAATGTACCGTAAGAATAAGCATCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGTTGTTCGGTAAGTCAGCGGTGAAACCTGAGCGCTCAACGTTCAGCCTGCCGTTGAAACTGCCGGGCTTGAGTTCAGCGGCGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATATCACGAGGAACTCCGATTGCGAAGGCAGCTTGCCATACTGCGACTGACACTGAAGCACGAAGGCGTGGGTATCAAACA "Porphyromonadaceae"
# TCGAGAATCTTCCGCAATGCACGAAAGTGTGACGGAGCGACGCCGCGTGATGGATGAAGCCCCTTGGGGTGTAAACATCTTTTATGAATGAAGAAATTTTTGACACTAGTTCATGAATAAGAGGCTCCTAACTCTGTGCCAGCAGGAGCGGTAATACAGAGGCCTCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGTGGTTATGTTAGTCGAATTTCAAAGACCCGGGCTCAACCTGGGGAAGGGATTCGAAACGGCATGACTAGAATTAGTGAGAGGTGTGCAGAACTCATGGAGTAGGGGTGAAATCCGTTGATCTCATGGGGAATACCAAATGCGAAGGCAGCACACTGGCGCTATATTGACACTGAGGCACGAAAGCGTGGGTAGCGAATG NA
# Genus
# TAGGGAATATTGGGCAATGGAGGCAACTCTGACCCAGCCATGCCGCGTGCAGGAAGAAGGCGTTATGCGTTGTAAACTGCTTTTATATAGGAAGAAATAGTCCTTGCGAGGAAAGTTGACGGTACTATATGAATAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTATTAAGTCAGTGGTGAAAGACGGTCGCTCAACGATTGCAGTGCCATTGATACTGGTAGACTTGAGTGGGATTGAGGTAGCTGGAATGGATAGTGTAGCGGTGAAATGCATAGATATTATCCAGAACACCAATTGCGTAGGCAAGTTACTAAGTCTCAACTGACGCTGAGGCACGAAAGTGTGGGTATCAAACA "Arcicella"
# TGAGGAATATTGGTCAATGGGCGAGAGCCTGAACCAGCCAAGTCGCGTGAAGGAAGACTGTCCTAAGGATTGTAAACTTCTTTTATACGGGAATAACGGGCGATACGAGTATTGCATTGAATGTACCGTAAGAATAAGCATCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGTTGTTCGGTAAGTCAGCGGTGAAACCTGAGCGCTCAACGTTCAGCCTGCCGTTGAAACTGCCGGGCTTGAGTTCAGCGGCGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATATCACGAGGAACTCCGATTGCGAAGGCAGCTTGCCATACTGCGACTGACACTGAAGCACGAAGGCGTGGGTATCAAACA "Porphyromonas"
# TCGAGAATCTTCCGCAATGCACGAAAGTGTGACGGAGCGACGCCGCGTGATGGATGAAGCCCCTTGGGGTGTAAACATCTTTTATGAATGAAGAAATTTTTGACACTAGTTCATGAATAAGAGGCTCCTAACTCTGTGCCAGCAGGAGCGGTAATACAGAGGCCTCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGTGGTTATGTTAGTCGAATTTCAAAGACCCGGGCTCAACCTGGGGAAGGGATTCGAAACGGCATGACTAGAATTAGTGAGAGGTGTGCAGAACTCATGGAGTAGGGGTGAAATCCGTTGATCTCATGGGGAATACCAAATGCGAAGGCAGCACACTGGCGCTATATTGACACTGAGGCACGAAAGCGTGGGTAGCGAATG NA
# Species
# TAGGGAATATTGGGCAATGGAGGCAACTCTGACCCAGCCATGCCGCGTGCAGGAAGAAGGCGTTATGCGTTGTAAACTGCTTTTATATAGGAAGAAATAGTCCTTGCGAGGAAAGTTGACGGTACTATATGAATAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGTTTATTAAGTCAGTGGTGAAAGACGGTCGCTCAACGATTGCAGTGCCATTGATACTGGTAGACTTGAGTGGGATTGAGGTAGCTGGAATGGATAGTGTAGCGGTGAAATGCATAGATATTATCCAGAACACCAATTGCGTAGGCAAGTTACTAAGTCTCAACTGACGCTGAGGCACGAAAGTGTGGGTATCAAACA NA
# TGAGGAATATTGGTCAATGGGCGAGAGCCTGAACCAGCCAAGTCGCGTGAAGGAAGACTGTCCTAAGGATTGTAAACTTCTTTTATACGGGAATAACGGGCGATACGAGTATTGCATTGAATGTACCGTAAGAATAAGCATCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGTTGTTCGGTAAGTCAGCGGTGAAACCTGAGCGCTCAACGTTCAGCCTGCCGTTGAAACTGCCGGGCTTGAGTTCAGCGGCGGCAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATATCACGAGGAACTCCGATTGCGAAGGCAGCTTGCCATACTGCGACTGACACTGAAGCACGAAGGCGTGGGTATCAAACA "gingivalis"
# TCGAGAATCTTCCGCAATGCACGAAAGTGTGACGGAGCGACGCCGCGTGATGGATGAAGCCCCTTGGGGTGTAAACATCTTTTATGAATGAAGAAATTTTTGACACTAGTTCATGAATAAGAGGCTCCTAACTCTGTGCCAGCAGGAGCGGTAATACAGAGGCCTCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGTGCGTAGGTGGTTATGTTAGTCGAATTTCAAAGACCCGGGCTCAACCTGGGGAAGGGATTCGAAACGGCATGACTAGAATTAGTGAGAGGTGTGCAGAACTCATGGAGTAGGGGTGAAATCCGTTGATCTCATGGGGAATACCAAATGCGAAGGCAGCACACTGGCGCTATATTGACACTGAGGCACGAAAGCGTGGGTAGCGAATG NA
```
```{r}
sessionInfo()
```