Skip to content

Commit

Permalink
Add checkFileIndex
Browse files Browse the repository at this point in the history
  • Loading branch information
LouisLeNezet committed Jul 10, 2024
1 parent 022bce8 commit 21a2704
Show file tree
Hide file tree
Showing 2 changed files with 217 additions and 19 deletions.
71 changes: 52 additions & 19 deletions subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,11 @@ workflow PIPELINE_INITIALISATION {
checkChr(chr_regions, extractChr(ch_hap_legend), "hap legend files")
checkChr(chr_regions, extractChr(ch_posfile), "position")

// Check that all input files have the correct index
checkFileIndex(ch_input)
checkFileIndex(ch_input_truth)
checkFileIndex(ch_panel)
checkFileIndex(ch_genome)

emit:
input = ch_input // [ [meta], file, index ]
Expand Down Expand Up @@ -383,43 +388,71 @@ def checkChr(chr_a, chr_b, name){
.combine(chr_b)
.map{
a, b ->
if (!b == [null] & !(a - b).isEmpty()) {
if (b != [null] & !(a - b).isEmpty()) {
error "Chr : ${a - b} is missing from ${name}"
}
}
}

//
// Get file extension
//
def getFileExtension(file) {
if (file instanceof String) {
return file.replace(".gz","").split("\\.").last()
} else if (file instanceof Path) {
return file.getName().replace(".gz","").split("\\.").last()
} else if (file instanceof ArrayList) {
if (file == []) {
return null
} else {
error "Array not supported"
}
} else {
error "Type not supported: ${file.getClass()}"
}
}

//
// Check if all input files have the same extension
//
def getAllFilesExtension(ch_input) {
files_ext = ch_input
.map {
if (it[1] instanceof String) {
return it[1].split("\\.").last()
} else if (it[1] instanceof Path) {
return it[1].getName().split("\\.").last()
} else if (it[1] instanceof ArrayList) {
if (it[1] == []) {
return null
} else {
error "Array not supported"
}
} else {
println it[1].getClass()
error "Type not supported"
}
} // Extract files extensions
.map { getFileExtension(it[1]) } // Extract files extensions
.toList() // Collect extensions into a list
.map { extensions ->
if (extensions.unique().size() != 1) {
println "Extensions: ${extensions}"
error "All input files must have the same extension"
error "All input files must have the same extension: ${extensions.unique()}"
}
return extensions[0]
}
}

//
// Check correspondance file / index
//
def checkFileIndex(ch_input) {
ch_input
.map {
meta, file, index ->
file_ext = getFileExtension(file)
index_ext = getFileExtension(index)
if (file_ext in ["vcf", "bcf"] && !(index_ext in ["tbi", "csi"]) ) {
error "${meta}: Index file for [.vcf, .vcf.gz, bcf] must have the extension [.tbi, .csi]"
}
if (file_ext == "bam" && index_ext != "bai") {
error "${meta}: Index file for .bam must have the extension .bai"
}
if (file_ext == "cram" && index_ext != "crai") {
error "${meta}: Index file for .cram must have the extension .crai"
}
if (file_ext in ["fa", "fasta"] && index_ext != "fai") {
error "${meta}: Index file for [fa, fasta] must have the extension .fai"
}
}
return null
}

//
// Validate channels from input samplesheet
//
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
nextflow_function {

name "Test function phaseimpute"
script "../main.nf"
tag "function"

test("Test checkFileIndex no error") {
function "checkFileIndex"
tag "checkFileIndex"
when {
function {
"""
input[0] = channel.fromList([
[[id: "input"], file("input.vcf"), file("input.csi")],
[[id: "input2"], file("input2.vcf"), file("input2.tbi")],
[[id: "input3"], file("input3.bcf"), file("input3.csi")],
[[id: "input4"], file("input4.bcf"), file("input4.tbi")],
[[id: "input5"], file("input5.vcf.gz"), file("input5.csi")],
[[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")],
[[id: "input7"], file("input7.bam"), file("input5.bai")],
[[id: "input8"], file("input8.cram"), file("input6.crai")],
[[id: "input9"], file("input9.fa"), file("input9.fai")],
[[id: "input10"], file("input10.fa"), file("input10.fai")]
])
"""
}
}
then {
assert function.success
}
}
test("Test checkFileIndex bam bai") {
function "checkFileIndex"
tag "checkFileIndex"
when {
function {
"""
input[0] = channel.fromList([
[[id: "input7"], file("input7.bam"), file("input5.csi")],
[[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")],
[[id: "input8"], file("input8.bam"), file("input8.bai")]
])
"""
}
}
then {
assert function.failed
assert function.stdout.contains("[id:input7]: Index file for .bam must have the extension .bai")
}
}
test("Test checkFileIndex cram crai") {
function "checkFileIndex"
tag "checkFileIndex"
when {
function {
"""
input[0] = channel.fromList([
[[id: "input7"], file("input7.cram"), []],
[[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")],
[[id: "input8"], file("input8.bam"), file("input8.bai")]
])
"""
}
}
then {
assert function.failed
assert function.stdout.contains("[id:input7]: Index file for .cram must have the extension .crai")
}
}
test("Test checkFileIndex bcf csi") {
function "checkFileIndex"
tag "checkFileIndex"
when {
function {
"""
input[0] = channel.fromList([
[[id: "input7"], file("input7.bcf"), file("input7.txt")],
[[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")],
[[id: "input8"], file("input8.bam"), file("input8.bai")]
])
"""
}
}
then {
assert function.failed
assert function.stdout.contains("[id:input7]: Index file for [.vcf, .vcf.gz, bcf] must have the extension [.tbi, .csi]")
}
}
test("Test checkFileIndex vcf csi") {
function "checkFileIndex"
tag "checkFileIndex"
when {
function {
"""
input[0] = channel.fromList([
[[id: "input7"], file("input7.vcf"), file("input7.bai")],
[[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")],
[[id: "input8"], file("input8.bam"), file("input8.bai")]
])
"""
}
}
then {
assert function.failed
assert function.stdout.contains("[id:input7]: Index file for [.vcf, .vcf.gz, bcf] must have the extension [.tbi, .csi]")
}
}
test("Test checkFileIndex vcf.gz csi") {
function "checkFileIndex"
tag "checkFileIndex"
when {
function {
"""
input[0] = channel.fromList([
[[id: "input7"], file("input7.vcf.gz"), []],
[[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")],
[[id: "input8"], file("input8.bam"), file("input8.bai")]
])
"""
}
}
then {
assert function.failed
assert function.stdout.contains("[id:input7]: Index file for [.vcf, .vcf.gz, bcf] must have the extension [.tbi, .csi]")
}
}
test("Test checkFileIndex fa fai") {
function "checkFileIndex"
tag "checkFileIndex"
when {
function {
"""
input[0] = channel.fromList([
[[id: "input7"], file("input7.fa"), []],
[[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")],
[[id: "input8"], file("input8.bam"), file("input8.bai")]
])
"""
}
}
then {
assert function.failed
assert function.stdout.contains("[id:input7]: Index file for [fa, fasta] must have the extension .fai")
}
}
test("Test checkFileIndex fasta fai") {
function "checkFileIndex"
tag "checkFileIndex"
when {
function {
"""
input[0] = channel.fromList([
[[id: "input7"], file("input7.fasta"), file("input6.fia")],
[[id: "input6"], file("input6.vcf.gz"), file("input6.tbi")],
[[id: "input8"], file("input8.bam"), file("input8.bai")]
])
"""
}
}
then {
assert function.failed
assert function.stdout.contains("[id:input7]: Index file for [fa, fasta] must have the extension .fai")
}
}
}

0 comments on commit 21a2704

Please sign in to comment.