From 384e34e55574489217c75dd0fc47e83b658926d7 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Mon, 5 May 2025 21:37:37 +0000 Subject: [PATCH 01/14] feat: add formula and contrasts to limma --- modules/nf-core/limma/differential/main.nf | 2 +- .../limma/differential/templates/limma_de.R | 26 ++++ .../limma/differential/tests/main.nf.test | 130 +++++++++++++++++- 3 files changed, 154 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/limma/differential/main.nf b/modules/nf-core/limma/differential/main.nf index b7106c397b6..e753ad41ad6 100644 --- a/modules/nf-core/limma/differential/main.nf +++ b/modules/nf-core/limma/differential/main.nf @@ -8,7 +8,7 @@ process LIMMA_DIFFERENTIAL { 'community.wave.seqera.io/library/bioconductor-edger_bioconductor-limma:176c202c82450990' }" input: - tuple val(meta), val(contrast_variable), val(reference), val(target) + tuple val(meta), val(contrast_variable), val(reference), val(target), val(formula), val(comparison) tuple val(meta2), path(samplesheet), path(intensities) output: diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R index 7db3967677e..9885a48797b 100644 --- a/modules/nf-core/limma/differential/templates/limma_de.R +++ b/modules/nf-core/limma/differential/templates/limma_de.R @@ -52,6 +52,17 @@ read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.nam ) } +# +#' Turn “null” or empty strings into actual NULL +#' +#' @param x Input option +#' +#' @return NULL or x +#' +nullify <- function(x) { + if (is.character(x) && (tolower(x) == "null" || x == "")) NULL else x +} + ################################################ ################################################ ## PARSE PARAMETERS FROM NEXTFLOW ## @@ -71,6 +82,8 @@ opt <- list( contrast_variable = '$contrast_variable', reference_level = '$reference', target_level = '$target', + formula = '$formula', + contrast_string = '$comparison', blocking_variables = NULL, probe_id_col = "probe_id", sample_id_col = "experiment_accession", @@ -116,6 +129,10 @@ if ( ! is.null(opt\$round_digits)){ opt\$round_digits <- as.numeric(opt\$round_digits) } +# If there is no option supplied, convert string "null" to NULL +keys <- c("formula", "contrast_string") +opt[keys] <- lapply(opt[keys], nullify) + # Check if required parameters have been provided required_opts <- c('contrast_variable', 'reference_level', 'target_level', 'output_prefix') @@ -282,6 +299,10 @@ if ((! is.null(opt\$exclude_samples_col)) && (! is.null(opt\$exclude_samples_val ################################################ ################################################ +if (!is.null(opt\$formula)) { + model <- opt\$formula +} else { + # Build the model formula with blocking variables first model_vars <- c() @@ -302,6 +323,7 @@ for (v in vars_to_factor) { sample.sheet[[v]] <- as.factor(sample.sheet[[v]]) } +} ################################################ ################################################ ## Run Limma processes ## @@ -393,6 +415,9 @@ fit <- do.call(lmFit, lmfit_args) # Contrasts bit # Create the contrast string for the specified comparison +if (!is.null(opt\$contrast_string)) { + contrast_string <- opt\$contrast_string +} else { # Construct the expected column names for the target and reference levels in the design matrix treatment_target <- paste0(contrast_variable, ".", opt\$target_level) @@ -418,6 +443,7 @@ if ((treatment_target %in% colnames(design)) && (treatment_reference %in% colnam # This indicates an error; the specified levels are not found stop(paste0(treatment_target, " and ", treatment_reference, " not found in design matrix")) } +} # Create the contrast matrix contrast.matrix <- makeContrasts(contrasts=contrast_string, levels=design) diff --git a/modules/nf-core/limma/differential/tests/main.nf.test b/modules/nf-core/limma/differential/tests/main.nf.test index 551a53f12d7..a8667daf371 100644 --- a/modules/nf-core/limma/differential/tests/main.nf.test +++ b/modules/nf-core/limma/differential/tests/main.nf.test @@ -54,7 +54,7 @@ nextflow_process { ) input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) .map{ - tuple(it, it.variable, it.reference, it.target) + tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) } input[1] = ch_samplesheet .join(AFFY_JUSTRMA.out.expression) @@ -72,6 +72,130 @@ nextflow_process { ) } + } + + test("test_limma_differential - null formula and null complex contrasts") { + + config "./nextflow.config" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751_RAW.tar", checkIfExists: true) + ] + """ + } + } + run("AFFY_JUSTRMA") { + script "../../../affy/justrma/main.nf" + process { + """ + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) + ] + ) + input[0] = ch_samplesheet.join(UNTAR.out.untar) + input[1] = [[],[]] + """ + } + } + } + + when { + process { + """ + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) + ] + ) + input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) + .map{ + tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) + } + input[1] = ch_samplesheet + .join(AFFY_JUSTRMA.out.expression) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.model, process.out.versions).match() }, + { assert path(process.out.session_info[0][1]).getText().contains("limma_3.58.1") }, + { assert path(process.out.results[0][1]).getText().contains("1007_s_at\t-0.2775254") }, + { assert path(process.out.results[0][1]).getText().contains("1053_at\t-0.071547786") } + ) + } + + } + + test("test_limma_differential - with formula and with complex contrasts") { + + config "./nextflow.config" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751_RAW.tar", checkIfExists: true) + ] + """ + } + } + run("AFFY_JUSTRMA") { + script "../../../affy/justrma/main.nf" + process { + """ + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) + ] + ) + input[0] = ch_samplesheet.join(UNTAR.out.untar) + input[1] = [[],[]] + """ + } + } + } + + when { + process { + """ + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) + ] + ) + input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia', 'formula': '~ diagnosis', 'comparison': 'diagnosis.uremia']) + .map{ + tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) + } + input[1] = ch_samplesheet + .join(AFFY_JUSTRMA.out.expression) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.model, process.out.versions).match() }, + { assert path(process.out.session_info[0][1]).getText().contains("limma_3.58.1") }, + { assert path(process.out.results[0][1]).getText().contains("1007_s_at\t-0.27752") }, + { assert path(process.out.results[0][1]).getText().contains("1053_at\t-0.0715477") } + ) + } + } test("test_limma_differential - exclude_samples") { @@ -116,7 +240,7 @@ nextflow_process { ) input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) .map{ - tuple(it, it.variable, it.reference, it.target) + tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) } input[1] = ch_samplesheet .join(AFFY_JUSTRMA.out.expression) @@ -179,7 +303,7 @@ nextflow_process { ) input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) .map{ - tuple(it, it.variable, it.reference, it.target) + tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) } input[1] = ch_samplesheet .join(AFFY_JUSTRMA.out.expression) From 37f5e2c5e81fed81e0f53a263a5080362df70ed5 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 9 May 2025 15:09:04 +0000 Subject: [PATCH 02/14] test: update snapshots --- .../differential/tests/main.nf.test.snap | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/modules/nf-core/limma/differential/tests/main.nf.test.snap b/modules/nf-core/limma/differential/tests/main.nf.test.snap index 8678f660a9f..02626988cc2 100644 --- a/modules/nf-core/limma/differential/tests/main.nf.test.snap +++ b/modules/nf-core/limma/differential/tests/main.nf.test.snap @@ -91,6 +91,31 @@ }, "timestamp": "2024-10-31T12:34:25.24499" }, + "test_limma_differential - with formula and with complex contrasts": { + "content": [ + [ + [ + { + "id": "diagnosis_normal_uremia", + "variable": "diagnosis", + "reference": "normal", + "target": "uremia", + "formula": "~ diagnosis", + "comparison": "diagnosis.uremia" + }, + "diagnosis_normal_uremia.limma.model.txt:md5,660fe42c7c13e47524344eaf5b7d2e7c" + ] + ], + [ + "versions.yml:md5,88a6e42d753077edab8daf829cd4d943" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-05-09T13:31:02.129502295" + }, "test_limma_differential - stub": { "content": [ [ @@ -138,6 +163,29 @@ }, "timestamp": "2024-10-31T12:36:56.462834" }, + "test_limma_differential - null formula and null complex contrasts": { + "content": [ + [ + [ + { + "id": "diagnosis_normal_uremia", + "variable": "diagnosis", + "reference": "normal", + "target": "uremia" + }, + "diagnosis_normal_uremia.limma.model.txt:md5,70b000f632b8bdba4917046362dd876b" + ] + ], + [ + "versions.yml:md5,88a6e42d753077edab8daf829cd4d943" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.6" + }, + "timestamp": "2025-05-09T13:30:02.217053547" + }, "test_limma_differential - voom_mixed": { "content": [ [ From 4fb55dae7bb6a79919e30140161cc02d774c29c4 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 9 May 2025 15:25:08 +0000 Subject: [PATCH 03/14] docs: update meta.yml --- modules/nf-core/limma/differential/meta.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/nf-core/limma/differential/meta.yml b/modules/nf-core/limma/differential/meta.yml index dbf904305a1..3f652710e01 100644 --- a/modules/nf-core/limma/differential/meta.yml +++ b/modules/nf-core/limma/differential/meta.yml @@ -37,6 +37,12 @@ input: description: | The value within the contrast_variable column of the sample sheet that should be used to derive the target samples + - formula: + type: string + description: (Mandatory) R formula string used for modeling, e.g. '~ treatment + (1 | sample_number)'. + - comparison: + type: string + description: (Optional) Literal string passed to `limma::makeContrasts`, e.g. 'treatmenthND6 - treatmentmCherry'. - - meta2: type: map description: | From ca9960ddf55ccd58b5bc2ce4fac43fa7f111f38e Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 20 May 2025 12:15:06 +0000 Subject: [PATCH 04/14] test: remove unused test and add test with null reference/target --- .../limma/differential/tests/main.nf.test | 64 +------------------ 1 file changed, 1 insertion(+), 63 deletions(-) diff --git a/modules/nf-core/limma/differential/tests/main.nf.test b/modules/nf-core/limma/differential/tests/main.nf.test index a8667daf371..2a0b44f54aa 100644 --- a/modules/nf-core/limma/differential/tests/main.nf.test +++ b/modules/nf-core/limma/differential/tests/main.nf.test @@ -72,68 +72,6 @@ nextflow_process { ) } - } - - test("test_limma_differential - null formula and null complex contrasts") { - - config "./nextflow.config" - - setup { - run("UNTAR") { - script "../../../untar/main.nf" - process { - """ - input[0] = [ - [id: 'test'], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751_RAW.tar", checkIfExists: true) - ] - """ - } - } - run("AFFY_JUSTRMA") { - script "../../../affy/justrma/main.nf" - process { - """ - ch_samplesheet = Channel.of([ - [ id:'test' ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) - ] - ) - input[0] = ch_samplesheet.join(UNTAR.out.untar) - input[1] = [[],[]] - """ - } - } - } - - when { - process { - """ - ch_samplesheet = Channel.of([ - [ id:'test' ], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) - ] - ) - input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia']) - .map{ - tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) - } - input[1] = ch_samplesheet - .join(AFFY_JUSTRMA.out.expression) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.model, process.out.versions).match() }, - { assert path(process.out.session_info[0][1]).getText().contains("limma_3.58.1") }, - { assert path(process.out.results[0][1]).getText().contains("1007_s_at\t-0.2775254") }, - { assert path(process.out.results[0][1]).getText().contains("1053_at\t-0.071547786") } - ) - } - } test("test_limma_differential - with formula and with complex contrasts") { @@ -176,7 +114,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/array_expression/GSE38751.csv", checkIfExists: true) ] ) - input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'variable': 'diagnosis', 'reference': 'normal', 'target': 'uremia', 'formula': '~ diagnosis', 'comparison': 'diagnosis.uremia']) + input[0] = Channel.of(['id': 'diagnosis_normal_uremia', 'formula': '~ diagnosis', 'comparison': 'diagnosisuremia']) .map{ tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) } From 97ad3bbb1373c91813cca61488dd161c3a870a4d Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 20 May 2025 12:15:48 +0000 Subject: [PATCH 05/14] chore: indent and debug design --- .../limma/differential/templates/limma_de.R | 201 ++++++++++-------- 1 file changed, 112 insertions(+), 89 deletions(-) diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R index 9885a48797b..6c70665a9a8 100644 --- a/modules/nf-core/limma/differential/templates/limma_de.R +++ b/modules/nf-core/limma/differential/templates/limma_de.R @@ -130,12 +130,12 @@ if ( ! is.null(opt\$round_digits)){ } # If there is no option supplied, convert string "null" to NULL -keys <- c("formula", "contrast_string") +keys <- c("formula", "contrast_string", "contrast_variable", "reference_level", "target_level") opt[keys] <- lapply(opt[keys], nullify) # Check if required parameters have been provided -required_opts <- c('contrast_variable', 'reference_level', 'target_level', 'output_prefix') +required_opts <- c('output_prefix') missing <- required_opts[unlist(lapply(opt[required_opts], is.null)) | ! required_opts %in% names(opt)] if (length(missing) > 0){ @@ -224,48 +224,49 @@ if (length(missing_samples) > 0) { contrast_variable <- make.names(opt\$contrast_variable) blocking.vars <- c() -if (!contrast_variable %in% colnames(sample.sheet)) { - stop( - paste0( - 'Chosen contrast variable \"', - contrast_variable, - '\" not in sample sheet' - ) - ) -} else if (any(!c(opt\$reference_level, opt\$target_level) %in% sample.sheet[[contrast_variable]])) { - stop( - paste( - 'Please choose reference and target levels that are present in the', - contrast_variable, - 'column of the sample sheet' +if (!is.null(opt\$contrast_variable)) { + if (!contrast_variable %in% colnames(sample.sheet)) { + stop( + paste0( + 'Chosen contrast variable \"', + contrast_variable, + '\" not in sample sheet' + ) ) - ) -} else if (!is.null(opt\$blocking_variables)) { - blocking.vars = make.names(unlist(strsplit(opt\$blocking_variables, split = ';'))) - if (!all(blocking.vars %in% colnames(sample.sheet))) { - missing_block <- paste(blocking.vars[! blocking.vars %in% colnames(sample.sheet)], collapse = ',') + } else if (any(!c(opt\$reference_level, opt\$target_level) %in% sample.sheet[[contrast_variable]])) { stop( paste( - 'Blocking variables', missing_block, - 'do not correspond to sample sheet columns.' + 'Please choose reference and target levels that are present in the', + contrast_variable, + 'column of the sample sheet' ) ) + } else if (!is.null(opt\$blocking_variables)) { + blocking.vars = make.names(unlist(strsplit(opt\$blocking_variables, split = ';'))) + if (!all(blocking.vars %in% colnames(sample.sheet))) { + missing_block <- paste(blocking.vars[! blocking.vars %in% colnames(sample.sheet)], collapse = ',') + stop( + paste( + 'Blocking variables', missing_block, + 'do not correspond to sample sheet columns.' + ) + ) + } } -} -# Handle conflicts between blocking variables and block -if (!is.null(opt\$block) && !is.null(opt\$blocking_variables)) { - if (opt\$block %in% blocking.vars) { - warning(paste("Variable", opt\$block, "is specified both as a fixed effect and a random effect. It will be treated as a random effect only.")) - blocking.vars <- setdiff(blocking.vars, opt\$block) - if (length(blocking.vars) == 0) { - opt\$blocking_variables <- NULL - } else { - opt\$blocking_variables <- paste(blocking.vars, collapse = ';') + # Handle conflicts between blocking variables and block + if (!is.null(opt\$block) && !is.null(opt\$blocking_variables)) { + if (opt\$block %in% blocking.vars) { + warning(paste("Variable", opt\$block, "is specified both as a fixed effect and a random effect. It will be treated as a random effect only.")) + blocking.vars <- setdiff(blocking.vars, opt\$block) + if (length(blocking.vars) == 0) { + opt\$blocking_variables <- NULL + } else { + opt\$blocking_variables <- paste(blocking.vars, collapse = ';') + } } } } - # Optionally, subset to only the samples involved in the contrast if (opt\$subset_to_contrast_samples){ @@ -303,25 +304,25 @@ if (!is.null(opt\$formula)) { model <- opt\$formula } else { -# Build the model formula with blocking variables first -model_vars <- c() + # Build the model formula with blocking variables first + model_vars <- c() -if (!is.null(opt\$blocking_variables)) { - # Include blocking variables (including pairing variables if any) - model_vars <- c(model_vars, blocking.vars) -} + if (!is.null(opt\$blocking_variables)) { + # Include blocking variables (including pairing variables if any) + model_vars <- c(model_vars, blocking.vars) + } -# Add the contrast variable at the end -model_vars <- c(model_vars, contrast_variable) + # Add the contrast variable at the end + model_vars <- c(model_vars, contrast_variable) -# Construct the model formula -model <- paste('~ 0 +', paste(model_vars, collapse = '+')) + # Construct the model formula + model <- paste('~ 0 +', paste(model_vars, collapse = '+')) -# Make sure all the appropriate variables are factors -vars_to_factor <- model_vars # All variables in the model need to be factors -for (v in vars_to_factor) { - sample.sheet[[v]] <- as.factor(sample.sheet[[v]]) -} + # Make sure all the appropriate variables are factors + vars_to_factor <- model_vars # All variables in the model need to be factors + for (v in vars_to_factor) { + sample.sheet[[v]] <- as.factor(sample.sheet[[v]]) + } } ################################################ @@ -329,23 +330,34 @@ for (v in vars_to_factor) { ## Run Limma processes ## ################################################ ################################################ +if (!is.null(opt\$formula)) { + model_formula <- as.formula(opt\$formula) + cat("Using user formula object:\n") + print(model_formula) + design <- model.matrix(model_formula, data = sample.sheet) + print(colnames(design)) + cat("Internal column names after make.names():\n") + colnames(design) <- make.names(colnames(design)) -# Generate the design matrix -design <- model.matrix( - as.formula(model), - data=sample.sheet -) - -# Adjust column names for the contrast variable -colnames(design) <- sub( - paste0('^', contrast_variable), - paste0(contrast_variable, '.'), - colnames(design) -) + } else { + # Generate the design matrix + design <- model.matrix( + as.formula(model), + data=sample.sheet + ) -# Adjust column names to be syntactically valid -colnames(design) <- make.names(colnames(design)) + # Adjust column names for the contrast variable + colnames(design) <- sub( + paste0('^', contrast_variable), + paste0(contrast_variable, '.'), + colnames(design) + ) + # Adjust column names to be syntactically valid + colnames(design) <- make.names(colnames(design)) + cat("Final column names after make.names():\n") + print(colnames(design)) + } # Perform voom normalisation for RNA-seq data if (!is.null(opt\$use_voom) && opt\$use_voom) { @@ -416,37 +428,48 @@ fit <- do.call(lmFit, lmfit_args) # Create the contrast string for the specified comparison if (!is.null(opt\$contrast_string)) { - contrast_string <- opt\$contrast_string -} else { + cat("Using contrast string:", opt\$contrast_string, "\n") + contrast_string <- as.character(opt\$contrast_string) + + # Display design matrix + cat("Design matrix:") + + head(design, 3) + print(colnames(design)) + + contrast.matrix <- makeContrasts(contrasts=contrast_string, levels=colnames(design)) -# Construct the expected column names for the target and reference levels in the design matrix -treatment_target <- paste0(contrast_variable, ".", opt\$target_level) -treatment_reference <- paste0(contrast_variable, ".", opt\$reference_level) - -# Determine how to construct the contrast string based on which levels are present in the design matrix -if ((treatment_target %in% colnames(design)) && (treatment_reference %in% colnames(design))) { - # Both target and reference levels are present in the design matrix - # We can directly compare the two levels - contrast_string <- paste0(treatment_target, "-", treatment_reference) -} else if (treatment_target %in% colnames(design)) { - # Only the target level is present in the design matrix - # The reference level may have been omitted due to collinearity or being set as the baseline - # We compare the target level to zero (implicit reference) - contrast_string <- paste0(treatment_target, "- 0") -} else if (treatment_reference %in% colnames(design)) { - # Only the reference level is present in the design matrix - # The target level may have been omitted from the design matrix - # We compare zero (implicit target) to the reference level - contrast_string <- paste0("0 - ", treatment_reference) } else { - # Neither level is present in the design matrix - # This indicates an error; the specified levels are not found - stop(paste0(treatment_target, " and ", treatment_reference, " not found in design matrix")) -} + + # Construct the expected column names for the target and reference levels in the design matrix + treatment_target <- paste0(contrast_variable, ".", opt\$target_level) + treatment_reference <- paste0(contrast_variable, ".", opt\$reference_level) + + # Determine how to construct the contrast string based on which levels are present in the design matrix + if ((treatment_target %in% colnames(design)) && (treatment_reference %in% colnames(design))) { + # Both target and reference levels are present in the design matrix + # We can directly compare the two levels + contrast_string <- paste0(treatment_target, "-", treatment_reference) + } else if (treatment_target %in% colnames(design)) { + # Only the target level is present in the design matrix + # The reference level may have been omitted due to collinearity or being set as the baseline + # We compare the target level to zero (implicit reference) + contrast_string <- paste0(treatment_target, "- 0") + } else if (treatment_reference %in% colnames(design)) { + # Only the reference level is present in the design matrix + # The target level may have been omitted from the design matrix + # We compare zero (implicit target) to the reference level + contrast_string <- paste0("0 - ", treatment_reference) + } else { + # Neither level is present in the design matrix + # This indicates an error; the specified levels are not found + stop(paste0(treatment_target, " and ", treatment_reference, " not found in design matrix")) + } + contrast.matrix <- makeContrasts(contrasts=contrast_string, levels=design) + } # Create the contrast matrix -contrast.matrix <- makeContrasts(contrasts=contrast_string, levels=design) fit2 <- contrasts.fit(fit, contrast.matrix) # Prepare for and run eBayes From 8231bc1bcd47106f9d54f011ba6acf1cba03b916 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 20 May 2025 12:48:14 +0000 Subject: [PATCH 06/14] test: update test and snapshot --- .../limma/differential/tests/main.nf.test | 32 +++++++++++++++++++ .../differential/tests/main.nf.test.snap | 31 ++++++++++++++---- .../tests/nextflow.interaction.config | 13 ++++++++ 3 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 modules/nf-core/limma/differential/tests/nextflow.interaction.config diff --git a/modules/nf-core/limma/differential/tests/main.nf.test b/modules/nf-core/limma/differential/tests/main.nf.test index 2a0b44f54aa..ce58185c32d 100644 --- a/modules/nf-core/limma/differential/tests/main.nf.test +++ b/modules/nf-core/limma/differential/tests/main.nf.test @@ -12,6 +12,38 @@ nextflow_process { tag "affy/justrma" tag "untar" + test("RNAseq - Feature Counts - formula + comparison contrast string - interaction") { + config "./nextflow.interaction.config" + + when { + process { + """ + input[0] = Channel.of([ + 'id': 'genotype_WT_KO_treatment_Control_Treated', + 'formula': '~ genotype * treatment', + 'comparison': 'genotypeWT.treatmentTreated' // should be a 'make.names() string' + ]) + .map { + tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) + } + + input[1] = Channel.of([ + [ id: 'test' ], + file("https://github.com/nf-core/test-datasets/raw/differentialabundance/modules_testdata/variancepartition_dream/metadata.tsv", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/differentialabundance/modules_testdata/variancepartition_dream/counts.tsv", checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.model, process.out.versions).match() } + ) + } + } + test("test_limma_differential") { config "./nextflow.config" diff --git a/modules/nf-core/limma/differential/tests/main.nf.test.snap b/modules/nf-core/limma/differential/tests/main.nf.test.snap index 02626988cc2..c9586508c16 100644 --- a/modules/nf-core/limma/differential/tests/main.nf.test.snap +++ b/modules/nf-core/limma/differential/tests/main.nf.test.snap @@ -1,4 +1,26 @@ { + "RNAseq - Feature Counts - formula + comparison contrast string - interaction": { + "content": [ + [ + [ + { + "id": "genotype_WT_KO_treatment_Control_Treated", + "formula": "~ genotype * treatment", + "comparison": "genotypeWT.treatmentTreated" + }, + "genotype_WT_KO_treatment_Control_Treated.limma.model.txt:md5,82165b8b24da8f8cffafa457290feeb3" + ] + ], + [ + "versions.yml:md5,88a6e42d753077edab8daf829cd4d943" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.1" + }, + "timestamp": "2025-05-20T12:41:52.606512166" + }, "test_limma_differential - voom_blocking": { "content": [ [ @@ -97,11 +119,8 @@ [ { "id": "diagnosis_normal_uremia", - "variable": "diagnosis", - "reference": "normal", - "target": "uremia", "formula": "~ diagnosis", - "comparison": "diagnosis.uremia" + "comparison": "diagnosisuremia" }, "diagnosis_normal_uremia.limma.model.txt:md5,660fe42c7c13e47524344eaf5b7d2e7c" ] @@ -112,9 +131,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.6" + "nextflow": "25.04.1" }, - "timestamp": "2025-05-09T13:31:02.129502295" + "timestamp": "2025-05-20T12:44:07.77296995" }, "test_limma_differential - stub": { "content": [ diff --git a/modules/nf-core/limma/differential/tests/nextflow.interaction.config b/modules/nf-core/limma/differential/tests/nextflow.interaction.config new file mode 100644 index 00000000000..3fc574a7d1a --- /dev/null +++ b/modules/nf-core/limma/differential/tests/nextflow.interaction.config @@ -0,0 +1,13 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: 'LIMMA_DIFFERENTIAL' { + ext.args = { [ + "--sample_id_col sample", + "--probe_id_col gene_id", + "--blocking_variables $meta.blocking" + ].join(' ').trim() } + } + +} From 9d116d6ed6ff7f9fc2b986218243b3e0366b3822 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 21 May 2025 08:53:44 -0300 Subject: [PATCH 07/14] Update limma_de.R Co-authored-by: Gregor Sturm --- modules/nf-core/limma/differential/templates/limma_de.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R index 6c70665a9a8..77d91af304d 100644 --- a/modules/nf-core/limma/differential/templates/limma_de.R +++ b/modules/nf-core/limma/differential/templates/limma_de.R @@ -339,7 +339,7 @@ if (!is.null(opt\$formula)) { cat("Internal column names after make.names():\n") colnames(design) <- make.names(colnames(design)) - } else { +} else { # Generate the design matrix design <- model.matrix( as.formula(model), From 3692f47f0284694cbbdd47ada458a49a59dad66c Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 21 May 2025 17:24:09 +0000 Subject: [PATCH 08/14] style: remove redundant sections --- .../nf-core/limma/differential/templates/limma_de.R | 11 +---------- .../differential/tests/nextflow.interaction.config | 3 +-- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R index 77d91af304d..2d9bbe4258f 100644 --- a/modules/nf-core/limma/differential/templates/limma_de.R +++ b/modules/nf-core/limma/differential/templates/limma_de.R @@ -300,9 +300,7 @@ if ((! is.null(opt\$exclude_samples_col)) && (! is.null(opt\$exclude_samples_val ################################################ ################################################ -if (!is.null(opt\$formula)) { - model <- opt\$formula -} else { +if (is.null(opt\$formula)) { # Build the model formula with blocking variables first model_vars <- c() @@ -430,13 +428,6 @@ fit <- do.call(lmFit, lmfit_args) if (!is.null(opt\$contrast_string)) { cat("Using contrast string:", opt\$contrast_string, "\n") contrast_string <- as.character(opt\$contrast_string) - - # Display design matrix - cat("Design matrix:") - - head(design, 3) - print(colnames(design)) - contrast.matrix <- makeContrasts(contrasts=contrast_string, levels=colnames(design)) } else { diff --git a/modules/nf-core/limma/differential/tests/nextflow.interaction.config b/modules/nf-core/limma/differential/tests/nextflow.interaction.config index 3fc574a7d1a..f73bf1d62f8 100644 --- a/modules/nf-core/limma/differential/tests/nextflow.interaction.config +++ b/modules/nf-core/limma/differential/tests/nextflow.interaction.config @@ -5,8 +5,7 @@ process { withName: 'LIMMA_DIFFERENTIAL' { ext.args = { [ "--sample_id_col sample", - "--probe_id_col gene_id", - "--blocking_variables $meta.blocking" + "--probe_id_col gene_id" ].join(' ').trim() } } From 9cf968a0d1b20a46f13a4aa9bed5210f355eac9a Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 21 May 2025 17:42:51 +0000 Subject: [PATCH 09/14] Revert "style: remove redundant sections" This reverts commit 3692f47f0284694cbbdd47ada458a49a59dad66c. --- .../nf-core/limma/differential/templates/limma_de.R | 11 ++++++++++- .../differential/tests/nextflow.interaction.config | 3 ++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R index 2d9bbe4258f..77d91af304d 100644 --- a/modules/nf-core/limma/differential/templates/limma_de.R +++ b/modules/nf-core/limma/differential/templates/limma_de.R @@ -300,7 +300,9 @@ if ((! is.null(opt\$exclude_samples_col)) && (! is.null(opt\$exclude_samples_val ################################################ ################################################ -if (is.null(opt\$formula)) { +if (!is.null(opt\$formula)) { + model <- opt\$formula +} else { # Build the model formula with blocking variables first model_vars <- c() @@ -428,6 +430,13 @@ fit <- do.call(lmFit, lmfit_args) if (!is.null(opt\$contrast_string)) { cat("Using contrast string:", opt\$contrast_string, "\n") contrast_string <- as.character(opt\$contrast_string) + + # Display design matrix + cat("Design matrix:") + + head(design, 3) + print(colnames(design)) + contrast.matrix <- makeContrasts(contrasts=contrast_string, levels=colnames(design)) } else { diff --git a/modules/nf-core/limma/differential/tests/nextflow.interaction.config b/modules/nf-core/limma/differential/tests/nextflow.interaction.config index f73bf1d62f8..3fc574a7d1a 100644 --- a/modules/nf-core/limma/differential/tests/nextflow.interaction.config +++ b/modules/nf-core/limma/differential/tests/nextflow.interaction.config @@ -5,7 +5,8 @@ process { withName: 'LIMMA_DIFFERENTIAL' { ext.args = { [ "--sample_id_col sample", - "--probe_id_col gene_id" + "--probe_id_col gene_id", + "--blocking_variables $meta.blocking" ].join(' ').trim() } } From 8ade42c7309e94e40c55296b3d27ee9c9a7bf7c7 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 22 May 2025 13:04:32 +0000 Subject: [PATCH 10/14] chore: remove unused argument --- .../limma/differential/tests/nextflow.interaction.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/nf-core/limma/differential/tests/nextflow.interaction.config b/modules/nf-core/limma/differential/tests/nextflow.interaction.config index 3fc574a7d1a..f73bf1d62f8 100644 --- a/modules/nf-core/limma/differential/tests/nextflow.interaction.config +++ b/modules/nf-core/limma/differential/tests/nextflow.interaction.config @@ -5,8 +5,7 @@ process { withName: 'LIMMA_DIFFERENTIAL' { ext.args = { [ "--sample_id_col sample", - "--probe_id_col gene_id", - "--blocking_variables $meta.blocking" + "--probe_id_col gene_id" ].join(' ').trim() } } From 85797dff542577b42e3445b362595c06ef1cf8cf Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 22 May 2025 13:07:42 +0000 Subject: [PATCH 11/14] chore: remove debug prints --- modules/nf-core/limma/differential/templates/limma_de.R | 7 ------- 1 file changed, 7 deletions(-) diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R index 77d91af304d..70f90a257cb 100644 --- a/modules/nf-core/limma/differential/templates/limma_de.R +++ b/modules/nf-core/limma/differential/templates/limma_de.R @@ -430,13 +430,6 @@ fit <- do.call(lmFit, lmfit_args) if (!is.null(opt\$contrast_string)) { cat("Using contrast string:", opt\$contrast_string, "\n") contrast_string <- as.character(opt\$contrast_string) - - # Display design matrix - cat("Design matrix:") - - head(design, 3) - print(colnames(design)) - contrast.matrix <- makeContrasts(contrasts=contrast_string, levels=colnames(design)) } else { From b88bc1acc04c67aef006601cb7c3810ac99b10a9 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 22 May 2025 13:30:37 +0000 Subject: [PATCH 12/14] refact: simplify code blocks --- .../limma/differential/templates/limma_de.R | 27 ++++++------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R index 70f90a257cb..d55436dcc1b 100644 --- a/modules/nf-core/limma/differential/templates/limma_de.R +++ b/modules/nf-core/limma/differential/templates/limma_de.R @@ -296,14 +296,19 @@ if ((! is.null(opt\$exclude_samples_col)) && (! is.null(opt\$exclude_samples_val ################################################ ################################################ -## Build the Model Formula ## +## Build the Model Formula and Run Limma ## ################################################ ################################################ if (!is.null(opt\$formula)) { model <- opt\$formula -} else { + model_formula <- as.formula(model) + cat("Using user-specified formula:\n ", deparse(model_formula), "\n") + design <- model.matrix(model_formula, data = sample.sheet) + colnames(design) <- make.names(colnames(design)) + cat("Column names after make.names():\n ", paste(colnames(design), collapse = ", "), "\n") +} else { # Build the model formula with blocking variables first model_vars <- c() @@ -324,22 +329,6 @@ if (!is.null(opt\$formula)) { sample.sheet[[v]] <- as.factor(sample.sheet[[v]]) } -} -################################################ -################################################ -## Run Limma processes ## -################################################ -################################################ -if (!is.null(opt\$formula)) { - model_formula <- as.formula(opt\$formula) - cat("Using user formula object:\n") - print(model_formula) - design <- model.matrix(model_formula, data = sample.sheet) - print(colnames(design)) - cat("Internal column names after make.names():\n") - colnames(design) <- make.names(colnames(design)) - -} else { # Generate the design matrix design <- model.matrix( as.formula(model), @@ -357,7 +346,7 @@ if (!is.null(opt\$formula)) { colnames(design) <- make.names(colnames(design)) cat("Final column names after make.names():\n") print(colnames(design)) - } +} # Perform voom normalisation for RNA-seq data if (!is.null(opt\$use_voom) && opt\$use_voom) { From 102468c5602bfff4cd50278a0c80064f6895a6ca Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 22 May 2025 15:02:21 +0000 Subject: [PATCH 13/14] docs: update meta.yml --- modules/nf-core/limma/differential/meta.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/nf-core/limma/differential/meta.yml b/modules/nf-core/limma/differential/meta.yml index 3f652710e01..3a4c7e545c7 100644 --- a/modules/nf-core/limma/differential/meta.yml +++ b/modules/nf-core/limma/differential/meta.yml @@ -25,24 +25,24 @@ input: - contrast_variable: type: string description: | - The column in the sample sheet that should be used to define groups for + (Optional, required if reference and target are used) The column in the sample sheet that should be used to define groups for comparison - reference: type: string description: | - The value within the contrast_variable column of the sample sheet that + (Optional, required if contrast_variable and target are used) The value within the contrast_variable column of the sample sheet that should be used to derive the reference samples - target: type: string description: | - The value within the contrast_variable column of the sample sheet that + (Optional, required if contrast_variable and reference are used) The value within the contrast_variable column of the sample sheet that should be used to derive the target samples - formula: type: string - description: (Mandatory) R formula string used for modeling, e.g. '~ treatment + (1 | sample_number)'. + description: (Optional, requires comparison if used) R formula string used for modeling, e.g. '~ treatment'. - comparison: type: string - description: (Optional) Literal string passed to `limma::makeContrasts`, e.g. 'treatmenthND6 - treatmentmCherry'. + description: (Optional, mandatory if formula is used) Literal string passed to `limma::makeContrasts`, e.g. 'treatmentmCherry'. - - meta2: type: map description: | From d8672c366f259af5c57ca5274b5cbf95811501a0 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 23 May 2025 16:30:09 +0000 Subject: [PATCH 14/14] style: print only "meta.id" instead of full meta --- modules/nf-core/limma/differential/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/limma/differential/main.nf b/modules/nf-core/limma/differential/main.nf index e753ad41ad6..0d40e8654c2 100644 --- a/modules/nf-core/limma/differential/main.nf +++ b/modules/nf-core/limma/differential/main.nf @@ -1,5 +1,5 @@ process LIMMA_DIFFERENTIAL { - tag "$meta" + tag "$meta.id" label 'process_single' conda "${moduleDir}/environment.yml"