From 21a2704c65ef23f509117744a6a78cc013eb6c33 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 10 Jul 2024 12:58:11 +0200 Subject: [PATCH] Add checkFileIndex --- .../utils_nfcore_phaseimpute_pipeline/main.nf | 71 ++++++-- .../tests/function.nf.test | 165 ++++++++++++++++++ 2 files changed, 217 insertions(+), 19 deletions(-) create mode 100644 subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 45c3f1b9..96b68b6b 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -248,6 +248,11 @@ workflow PIPELINE_INITIALISATION { checkChr(chr_regions, extractChr(ch_hap_legend), "hap legend files") checkChr(chr_regions, extractChr(ch_posfile), "position") + // Check that all input files have the correct index + checkFileIndex(ch_input) + checkFileIndex(ch_input_truth) + checkFileIndex(ch_panel) + checkFileIndex(ch_genome) emit: input = ch_input // [ [meta], file, index ] @@ -383,43 +388,71 @@ def checkChr(chr_a, chr_b, name){ .combine(chr_b) .map{ a, b -> - if (!b == [null] & !(a - b).isEmpty()) { + if (b != [null] & !(a - b).isEmpty()) { error "Chr : ${a - b} is missing from ${name}" } } } +// +// Get file extension +// +def getFileExtension(file) { + if (file instanceof String) { + return file.replace(".gz","").split("\\.").last() + } else if (file instanceof Path) { + return file.getName().replace(".gz","").split("\\.").last() + } else if (file instanceof ArrayList) { + if (file == []) { + return null + } else { + error "Array not supported" + } + } else { + error "Type not supported: ${file.getClass()}" + } +} + // // Check if all input files have the same extension // def getAllFilesExtension(ch_input) { files_ext = ch_input - .map { - if (it[1] instanceof String) { - return it[1].split("\\.").last() - } else if (it[1] instanceof Path) { - return it[1].getName().split("\\.").last() - } else if (it[1] instanceof ArrayList) { - if (it[1] == []) { - return null - } else { - error "Array not supported" - } - } else { - println it[1].getClass() - error "Type not supported" - } - } // Extract files extensions + .map { getFileExtension(it[1]) } // Extract files extensions .toList() // Collect extensions into a list .map { extensions -> if (extensions.unique().size() != 1) { - println "Extensions: ${extensions}" - error "All input files must have the same extension" + error "All input files must have the same extension: ${extensions.unique()}" } return extensions[0] } } +// +// Check correspondance file / index +// +def checkFileIndex(ch_input) { + ch_input + .map { + meta, file, index -> + file_ext = getFileExtension(file) + index_ext = getFileExtension(index) + if (file_ext in ["vcf", "bcf"] && !(index_ext in ["tbi", "csi"]) ) { + error "${meta}: Index file for [.vcf, .vcf.gz, bcf] must have the extension [.tbi, .csi]" + } + if (file_ext == "bam" && index_ext != "bai") { + error "${meta}: Index file for .bam must have the extension .bai" + } + if (file_ext == "cram" && index_ext != "crai") { + error "${meta}: Index file for .cram must have the extension .crai" + } + if (file_ext in ["fa", "fasta"] && index_ext != "fai") { + error "${meta}: Index file for [fa, fasta] must have the extension .fai" + } + } + return null +} + // // Validate channels from input samplesheet // diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test new file mode 100644 index 00000000..c23330d9 --- /dev/null +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test @@ -0,0 +1,165 @@ +nextflow_function { + + name "Test function phaseimpute" + script "../main.nf" + tag "function" + + test("Test checkFileIndex no error") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = channel.fromList([ + [[id: "input"], file("input.vcf"), file("input.csi")], + [[id: "input2"], file("input2.vcf"), file("input2.tbi")], + [[id: "input3"], file("input3.bcf"), file("input3.csi")], + [[id: "input4"], file("input4.bcf"), file("input4.tbi")], + [[id: "input5"], file("input5.vcf.gz"), file("input5.csi")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input7"], file("input7.bam"), file("input5.bai")], + [[id: "input8"], file("input8.cram"), file("input6.crai")], + [[id: "input9"], file("input9.fa"), file("input9.fai")], + [[id: "input10"], file("input10.fa"), file("input10.fai")] + ]) + """ + } + } + then { + assert function.success + } + } + test("Test checkFileIndex bam bai") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = channel.fromList([ + [[id: "input7"], file("input7.bam"), file("input5.csi")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for .bam must have the extension .bai") + } + } + test("Test checkFileIndex cram crai") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = channel.fromList([ + [[id: "input7"], file("input7.cram"), []], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for .cram must have the extension .crai") + } + } + test("Test checkFileIndex bcf csi") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = channel.fromList([ + [[id: "input7"], file("input7.bcf"), file("input7.txt")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for [.vcf, .vcf.gz, bcf] must have the extension [.tbi, .csi]") + } + } + test("Test checkFileIndex vcf csi") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = channel.fromList([ + [[id: "input7"], file("input7.vcf"), file("input7.bai")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for [.vcf, .vcf.gz, bcf] must have the extension [.tbi, .csi]") + } + } + test("Test checkFileIndex vcf.gz csi") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = channel.fromList([ + [[id: "input7"], file("input7.vcf.gz"), []], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for [.vcf, .vcf.gz, bcf] must have the extension [.tbi, .csi]") + } + } + test("Test checkFileIndex fa fai") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = channel.fromList([ + [[id: "input7"], file("input7.fa"), []], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for [fa, fasta] must have the extension .fai") + } + } + test("Test checkFileIndex fasta fai") { + function "checkFileIndex" + tag "checkFileIndex" + when { + function { + """ + input[0] = channel.fromList([ + [[id: "input7"], file("input7.fasta"), file("input6.fia")], + [[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")], + [[id: "input8"], file("input8.bam"), file("input8.bai")] + ]) + """ + } + } + then { + assert function.failed + assert function.stdout.contains("[id:input7]: Index file for [fa, fasta] must have the extension .fai") + } + } +}