8000 Improve backsub input validation and testing by jmuhlich · Pull Request #83 · nf-core/mcmicro · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Improve backsub input validation and testing #83

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions assets/schema_input_cycle.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,18 @@
"sample": {
"type": "string",
"pattern": "^\\S+$",
"errorMessage": "sample name must be provided and cannot contain spaces"
"errorMessage": "sample name must be provided and cannot contain spaces",
"meta": "id"
},
"cycle_number": {
"type": "integer",
"errorMessage": "cycle_number must be provided. It should be 1-based, sequential and have no gaps"
"errorMessage": "cycle_number must be provided. It should be 1-based, sequential and have no gaps",
"meta": "cycle_number"
},
"channel_count": {
"type": "integer",
"errorMessage": "channel_count name must be provided. It should be 1-based, sequential and have no gaps"
"errorMessage": "channel_count name must be provided. It should be 1-based, sequential and have no gaps",
"meta": "channel_count"
},
"image_tiles": {
"type": "string",
Expand Down
3 changes: 2 additions & 1 deletion assets/schema_input_sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
"sample": {
"type": "string",
"pattern": "^\\S+$",
"errorMessage": "Sample name must be provided and cannot contain spaces"
"errorMessage": "Sample name must be provided and cannot contain spaces",
"meta": "id"
},
"image_directory": {
"type": "string",
Expand Down
34 changes: 25 additions & 9 deletions assets/schema_marker.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,41 +10,57 @@
"channel_number": {
"type": "integer",
"minimum": 1,
"errorMessage": "channel_number must be provided"
"errorMessage": "channel_number must be provided",
"meta": "channel_number"
},
"cycle_number": {
"type": "integer",
"minimum": 1,
"errorMessage": "cycle_number must be provided"
"errorMessage": "cycle_number must be provided",
"meta": "cycle_number"
},
"marker_name": {
"type": "string",
"pattern": "\\S",
"errorMessage": "marker_name must be provided"
"errorMessage": "marker_name must be provided",
"meta": "marker_name"
},
"filter": {
"type": "string",
"default": null,
"pattern": "\\S",
"errorMessage": ""
"errorMessage": "",
"meta": "filter"
},
"excitation_wavelength": {
"type": "integer",
"errorMessage": ""
"default": null,
"errorMessage": "",
"meta": "excitation_wavelength"
},
"emission_wavelength": {
"type": "integer",
"errorMessage": ""
"default": null,
"errorMessage": "",
"meta": "emission_wavelength"
},
"exposure": {
"type": "number",
"default": null,
"minimum": 0.001,
"errorMessage": "Exposure time must be a positive number."
"errorMessage": "Exposure time must be a positive number.",
"meta": "exposure"
},
"background": {
"type": "string"
"type": "string",
"default": null,
"pattern": "\\S",
"meta": "background"
},
"remove": {
"type": "boolean"
"type": "boolean",
"default": null,
"meta": "remove"
}
},
"required": ["channel_number", "cycle_number", "marker_name"]
Expand Down
80 changes: 37 additions & 43 deletions subworkflows/local/utils_nfcore_mcmicro_pipeline/main.nf
F438
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,6 @@ workflow PIPELINE_INITIALISATION {
//
if (input_cycle) {
ch_samplesheet = Channel.fromList(samplesheetToList(params.input_cycle, "${projectDir}/assets/schema_input_cycle.json"))
.map{
sample, cycle_number, channel_count, image_tiles, dfp, ffp ->
[
[id: sample, cycle_number: cycle_number, channel_count: channel_count],
image_tiles,
dfp,
ffp
]
}
.dump(tag: 'ch_samplesheet (cycle)')
} else if (input_sample) {
ch_samplesheet = Channel.fromList(samplesheetToList(params.input_sample, "${projectDir}/assets/schema_input_sample.json"))
Expand All @@ -93,7 +84,8 @@ workflow PIPELINE_INITIALISATION {
}

ch_markersheet = Channel.fromList(samplesheetToList(params.marker_sheet, "${projectDir}/assets/schema_marker.json"))
.toList()
// Extract only the meta-maps since we mark all fields as meta.
.collect({ it[0] }, flat: false)
.map{ validateInputMarkersheet(it) }
.dump(tag: 'ch_markersheet')

Expand Down Expand Up @@ -192,54 +184,56 @@ def validateInputMarkersheet( markersheet_data ) {
def cycle_number_list = []

markersheet_data.each { row ->
def (channel_number, cycle_number, marker_name) = row

if (marker_name_list.contains(marker_name)) {
error("Duplicate marker name found in marker sheet!")
if (marker_name_list.contains(row.marker_name)) {
error("Please check input markersheet -> duplicate marker name '${row.marker_name}'")
} else {
marker_name_list.add(marker_name)
marker_name_list.add(row.marker_name)
}

if (channel_number_list && (channel_number != channel_number_list[-1] && channel_number != channel_number_list[-1] + 1)) {
error("Channel_number cannot skip values and must be in order!")
if (channel_number_list && (row.channel_number != channel_number_list[-1] && row.channel_number != channel_number_list[-1] + 1)) {
error("Please check input markersheet -> channel_number is not consecutive for marker '${row.marker_name}'")
} else {
channel_number_list.add(channel_number)
channel_number_list.add(row.channel_number)
}

if (cycle_number_list && (cycle_number != cycle_number_list[-1] && cycle_number != cycle_number_list[-1] + 1)) {
error("Cycle_number cannot skip values and must be in order!")
if (cycle_number_list && (row.cycle_number != cycle_number_list[-1] && row.cycle_number != cycle_number_list[-1] + 1)) {
error("Please check input markersheet -> cycle_number is not consecutive for marker '${row.marker_name}'")
} else {
cycle_number_list.add(cycle_number)
cycle_number_list.add(row.cycle_number)
}
}

// uniqueness of (channel, cycle) tuple in marker sheet
def test_tuples = [channel_number_list, cycle_number_list].transpose()
def dups = test_tuples.countBy{ it }.findAll{ _, count -> count > 1 }*.key
if (dups) {
error("Duplicate [channel, cycle] pairs: ${dups}")
error("Please check input markersheet -> duplicate [channel, cycle] pairs: ${dups}")
}

// validate backsub columns if present
def exposure_list = markersheet_data.findResults{ _1, _2, _3, _4, _5, _6, exposure, _8, _9 -> exposure ?: null }
def background_list = markersheet_data.findResults{ _1, _2, _3, _4, _5, _6, _7, background, _9 -> background ?: null }
def remove_list = markersheet_data.findResults{ _1, _2, _3, _4, _5, _6, _7, _8, remove -> remove ?: null }

if (!background_list && (exposure_list || remove_list)) {
error("No values in background column, but values in either exposure or remove columns. Must have background column values to perform background subtraction.")
} else if (background_list) {
inter_list = marker_name_list.intersect(background_list)
if (inter_list.size() != background_list.size()) {
outliers_list = background_list - inter_list
error('background column values must exist in the marker_name column. The following background column values do not exist in the marker_name column: ' + outliers_list)
// Validate backsub data
if (!params.backsub) {
def backsub_columns = ['exposure', 'background', 'remove']
if (markersheet_data*.subMap(backsub_columns)*.collect{ c, v -> v != null }.flatten().any()) {
Comment on lines +216 to +217
Copy link
Contributor
@kbestak kbestak May 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the remove column is provided, params.backsub is false, if just a warning is passed, MCQUANT would fail due to channel removal logic. Adding an error targeting this case

Suggested change
def backsub_columns = ['exposure', 'background', 'remove']
if (markersheet_data*.subMap(backsub_columns)*.collect{ c, v -> v != null }.flatten().any()) {
def backsub_columns = ['exposure', 'background']
if (markersheet_data*.subMap('remove')*.collect{ c, v -> v != null }.flatten().any()) {
error("The remove column is present in the marker sheet, but params.backsub is set to false. Please set params.backsub to true or remove TRUE values from the remove column.")
}
else if (markersheet_data*.subMap(backsub_columns)*.collect{ c, v -> v != null }.flatten().any()) {

log.warn("One or more of the ${backsub_columns.join('/')} columns are present in the marker sheet, but params.backsub is set to false. Subtraction will NOT be performed unless params.backsub is set to true.")
}

if (!exposure_list) {
error('You must have at least one value in the exposure column to perform background subtraction')
} else {
def markers_used_as_background = markersheet_data.findResults{ it.background }.toSet()
if (!markers_used_as_background) {
error("Please check input markersheet -> Backsub is enabled, but no background channels have been defined so subtraction can't proceed. Either set params.backsub=false or specify how the channel subtraction should be performed.")
}

if (!remove_list) {
error ('You must have at least one value in the remove column to perform background subtraction')
markersheet_data.each { row ->
if (row.background) {
if (!row.exposure) {
error("Please check input markersheet -> Missing exposure value for marker: '${row.marker_name}'")
}
if (row.background !in marker_name_list) {
error("Please check input markersheet -> Unknown background '${row.background}' for marker '${row.marker_name}' (background must be the name of another marker in this sheet).")
}
}
if (row.marker_name in markers_used_as_background && !row.exposure) {
error("Please check input markersheet -> Missing exposure value for marker used as background: '${row.marker_name}'")
}
}
}

Expand All @@ -248,7 +242,7 @@ def validateInputMarkersheet( markersheet_data ) {

def validateInputSamplesheetMarkersheet ( samples, markers ) {
def sample_cycles = samples.collect{ meta, image_tiles, dfp, ffp -> meta.cycle_number }
def marker_cycles = markers.collect{ channel_number, cycle_number, marker_name, _1, _2, _3, _4, _5, _6 -> cycle_number }
def marker_cycles = markers.collect{ meta -> meta.cycle_number }

if (marker_cycles.unique(false) != sample_cycles.unique(false) ) {
error("cycle_number values must match between sample and marker sheets")
Expand All @@ -258,7 +252,7 @@ def validateInputSamplesheetMarkersheet ( samples, markers ) {

def channel_cycle_map = samples.collect{ meta, image_tiles, dfp, ffp -> [meta.id,meta.cycle_number] }.groupBy{ it[0] }
channel_cycle_map.each { entry ->
last_val = -1
def last_val = -1
entry.value.collect{ it[1] }.each{ curr_val ->
if (last_val != -1 && (curr_val > (last_val + 1) || curr_val <= last_val)) {
error("cycle_number values must be increasing with no gaps")
Expand All @@ -269,7 +263,7 @@ def validateInputSamplesheetMarkersheet ( samples, markers ) {
}

def expandSampleRow( row ) {
def (sample, image_directory, dfp, ffp) = row
def (meta, image_directory, dfp, ffp) = row
def files = []

file(image_directory).eachFileRecurse (FileType.FILES) {
Expand All @@ -279,7 +273,7 @@ def expandSampleRow( row ) {
}

return files.withIndex(1).collect{ f, i ->
[[id: sample, cycle_number: i], f, dfp, ffp]
[meta + [cycle_number: i], f, dfp, ffp]
}
}
//
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
nextflow_function {

name "Validation behavior for pipeline inputs"
script "../main.nf"

test("Error on backsub if background column is empty") {
function "validateInputMarkersheet"
when {
params {
backsub = true
}
function {
"""
input[0] = [
[marker_name:"M1", cycle_number:1, channel_number:1, exposure: 100],
[marker_name:"M2", cycle_number:1, channel_number:2, remove: true],
]
"""
}
}
then {
assert function.failed
assert function.stdout*.contains("set params.backsub=false").any()
}
}

test("Warn if backsub false but columns are provided") {
function "validateInputMarkersheet"
when {
params {
backsub = false
}
function {
"""
input[0] = [
[marker_name:"M1", cycle_number:1, channel_number:1, exposure: 100],
[marker_name:"M2", cycle_number:1, channel_number:2, remove: true],
Copy link
Contributor

Choose a reason for 10000 hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If adding error for params.backsub = false and remove values added since this would now fail istead of warn

Suggested change
[marker_name:"M2", cycle_number:1, channel_number:2, remove: true],
[marker_name:"M2", cycle_number:1, channel_number:2, background:"M1"],

]
"""
}
}
then {
assert function.success
// log.warn output can't be tested yet - https://github.com/askimed/nf-test/issues/271
//assert function.stdout*.contains("Subtraction will NOT be performed").any()
}
}

Copy link
Contributor
@kbestak kbestak May 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If adding error for params.backsub = false and remove values added

Suggested change
test("Error if backsub false but remove is provided") {
function "validateInputMarkersheet"
when {
params {
backsub = true
}
function {
"""
input[0] = [
[marker_name:"M1", cycle_number:1, channel_number:1, remove: true],
]
"""
}
}
then {
assert function.failed
assert function.stdout*.contains("The remove column is present in the marker sheet").any()
}
}

test("Error on backsub if background value not in marker_name") {
function "validateInputMarkersheet"
when {
params {
backsub = true
}
function {
"""
input[0] = [
[marker_name:"M1", cycle_number:1, channel_number:1, exposure:1, background: "M3"],
]
"""
}
}
then {
assert function.failed
assert function.stdout*.contains("Unknown background").any()
}
}

test("Error on backsub if exposure time missing for foreground channel") {
function "validateInputMarkersheet"
when {
params {
backsub = true
}
function {
"""
input[0] = [
[marker_name:"M1", cycle_number:1, channel_number:1, background:"M2"],
[marker_name:"M2", cycle_number:1, channel_number:2, exposure:1],
]
"""
}
}
then {
assert function.failed
assert function.stdout*.contains("Missing exposure value for marker:").any()
}
}

test("Error on backsub if exposure time missing for background channel") {
function "validateInputMarkersheet"
when {
params {
backsub = true
}
function {
"""
input[0] = [
[marker_name:"M1", cycle_number:1, channel_number:1, exposure:1, background:"M2"],
[marker_name:"M2", cycle_number:1, channel_number:2],
]
"""
}
}
then {
assert function.failed
assert function.stdout*.find("Missing exposure.*background:").any()
}
}

}
Loading
Loading
0