Skip to content

Commit

Permalink
fix: solved bug in DA when missing contrast level in modelled feature
Browse files Browse the repository at this point in the history
  • Loading branch information
cvanderaa committed Jul 29, 2024
1 parent 32022fb commit e718229
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 32 deletions.
57 changes: 32 additions & 25 deletions R/ScpModel-DifferentialAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -249,32 +249,37 @@ scpDifferentialAnalysis <- function(object,

## Internal function that takes a contrast and retrieves the estimated
## log fold change and associated standard error between the two
## defined groups. The inference is performed on the model coefficients
## and the variance covariance matrix that are automatically retrieved
## from the SummarizedExperimeent object. The log fold change and the
## standard error are returned for all features in a list.
## defined groups. The inference is performed on the model
## coefficients and the variance covariance matrix that are
## automatically retrieved from the SummarizedExperimeent object. The
## log fold change and the standard error are returned for all
## features in a list.
##
## Note that the function may return NA estimates for features where
## missing data leads to loss of one or two levels of interest
## supplied by `contrast`.
##
## @param object An object that inherits from the
## `SummarizedExperiment` class. It must contain an estimated
## `ScpModel` in its metadata.
## @param contrast A `character(3)` with the following elements: 1. The
## name of a categorical variable to test; 2. The name of the
## reference group: 3. The name of the second group to contrast
## against the reference group. `coefficients` and `contrasts`
## cannot be both NULL.
## `SummarizedExperiment` class. It must contain an estimated
## `ScpModel` in its metadata.
## @param contrast A `character(3)` with the following elements: 1.
## The name of a categorical variable to test; 2. The name of the
## reference group: 3. The name of the second group to contrast
## against the reference group. `coefficients` and `contrasts`
## cannot be both NULL.
## @param name A `character(1)` providing the name to use to retrieve
## the model results. When retrieving a model and `name` is
## missing, the name of the first model found in `object` is used.
## the model results. When retrieving a model and `name` is missing,
## the name of the first model found in `object` is used.
##
## cf https://stats.stackexchange.com/a/446699
## cf https://stats.stackexchange.com/a/446699
.contrastToEstimates <- function(object, contrast, name) {
fits <- scpModelFitList(object, name, filtered = TRUE)
out <- sapply(fits, function(fit) {
coef <- scpModelFitCoefficients(fit)
vcov <- scpModelFitVcov(fit)
levs <- scpModelFitLevels(fit)[[contrast[[1]]]]
if (length(levs) <= 1) return(c(logFc = NA, se = NA))
contrastMat <- .levelsToContrastMatrix(contrast, levs)
if (is.null(contrastMat)) return(c(logFc = NA, se = NA))
sel <- grepl(contrast[[1]], names(coef))
logFc <- contrastMat %*% coef[sel]
se <- sqrt(contrastMat %*% vcov[sel, sel] %*% t(contrastMat))
Expand All @@ -289,20 +294,22 @@ scpDifferentialAnalysis <- function(object,
## the quantification of the log fold change between the 2 groups of
## interest.
##
## If one of the levels is absent in contrast (happens when a level
## has been dropped during modelling), the function return NA.
## If `levels` contains only a single level or if one of the levels
## requested in `contrast` is absent (happens when a level has been
## dropped during modelling), the function return NULL since no
## contrast matrix can be computed.
##
## @param contrasts A `character(3)` with the following elements: 1. The
## name of a categorical variable to test; 2. The name of the
## reference group: 3. The name of the second group to contrast
## against the reference group. `coefficients` and `contrasts`
## cannot be both NULL.
## @param contrasts A `character(3)` with the following elements: 1.
## The name of a categorical variable to test; 2. The name of the
## reference group: 3. The name of the second group to contrast
## against the reference group. `coefficients` and `contrasts`
## cannot be both NULL.
## @param levels A `character()` containing all possible levels
## contained by the model variable identified by contrast[[1]].
## contained by the model variable identified by contrast[[1]].
##
.levelsToContrastMatrix <- function(contrast, levels) {
if (any(!contrast[2:3] %in% levels))
return(structure(NA, .Names = contrast[[1]]))
if (length(levels) <= 1 || any(!contrast[2:3] %in% levels))
return(NULL)
df <- data.frame(group = factor(levels, levels = levels))
mm <- model.matrix(
~ 1 + group, data = df,
Expand Down
57 changes: 50 additions & 7 deletions tests/testthat/test-ScpModel-DifferentialAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -409,25 +409,68 @@ test_that(".contrastToEstimates", {
),
tolerance = 1E-2
)
## Test when a level of interest is dropped for a 2-level variable
se <- .createMinimalData(nr = 2, nc = 10)
se$condition1 <- as.factor(rep(1:2, length.out = ncol(se)))
se$condition2 <- as.factor(rep(1:2, each = 5))
assay(se)[, se$condition1 == 2] <- 10 * assay(se)[, se$condition1 == 2]
## Drop one level of condition2 for first row, second row is
## positive control.
assay(se)[1, se$condition2 == 2] <- NA
se <- scpModelWorkflow(se, formula = ~ 1 + condition1 + condition2)
expect_equal(
.contrastToEstimates(
se, contrast = c("condition2", "1", "2"), name = "model"
),
list(logFc = c(a = NA, b = 0.0001874609),
se = c(a = NA, b = 0.0005527098)),
tolerance = 1E-6
)
## Test when a level of interest is dropped for a 3-level variable
se <- .createMinimalData(nr = 2, nc = 12)
se$condition1 <- as.factor(rep(1:2, length.out = ncol(se)))
se$condition2 <- as.factor(rep(1:3, each = 4))
assay(se)[, se$condition1 == 2] <- 10 * assay(se)[, se$condition1 == 2]
## Drop one level of condition2 for first row, second row is
## positive control.
assay(se)[1, se$condition2 == 2] <- NA
se <- scpModelWorkflow(se, formula = ~ 1 + condition1 + condition2)
expect_equal(
.contrastToEstimates(
se, contrast = c("condition2", "1", "2"), name = "model"
),
list(logFc = c(a = NA, b = 0),
se = c(a = NA, b = 0.0005126847)),
tolerance = 1E-6
)
## Make sure the contrast for remaining levels is still estimated
expect_equal(
.contrastToEstimates(
se, contrast = c("condition2", "1", "3"), name = "model"
),
list(logFc = c(a = 0, b = 0),
se = c(a = 0.0007943138, b = 0.0005127487 )),
tolerance = 1E-6
)
})

test_that(".levelsToContrastMatrix", {
## 1 level or less = NA
expect_error(
## 1 level or less = NULL
expect_identical(
.levelsToContrastMatrix(contrast = c("condition", "A", "B"),
levels = "A"),
NA
NULL
)
expect_error(
expect_identical(
.levelsToContrastMatrix(contrast = c("condition", "A", "B"),
levels = character()),
NA
NULL
)
## 1 level to test is not in available levels = NA
expect_error(
expect_identical(
.levelsToContrastMatrix(contrast = c("condition", "A", "foo"),
levels = c("A", "B")),
NA
NULL
)
## 2-level variable
expect_identical(
Expand Down

0 comments on commit e718229

Please sign in to comment.