Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sv benchmark params are exposed #34

Merged
merged 6 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@
Supported SV callers: Manta, SVaba, Dragen, Delly, Lumpy ..
Available Truth samples: HG002, SEQC2

- If you have unresolved SVs, it is recommended to use only truvari with --pctsim 0.

- The size filtration parameters provided by truvari and svbenchmark do not apply in the same way. That is why using them are not recommended through the pipeline, but in vcf normalization steps, variants can be filtering safely.

- Please not that it is not possible to use exactly the same parameters for different benchmarking methods.

<!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.
Explain what rows and columns represent. For instance (please edit as appropriate):

Expand Down
4 changes: 2 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ process {
}
withName: "TRUVARI_BENCH" {
ext.prefix = {"${params.sample}"}
ext.args = {"--pctsize 0.5 --pctovl 0.5 --refdist 1000 --pick ac"}
ext.args = {"--pctsize $params.pctsize --pctovl $params.pctovl --pctseq $params.pctseq --refdist $params.refdist --pick $params.pick --chunksize $params.chunksize"}
ext.when = { params.method.split(',').contains('truvari') }
publishDir = [
path: {"${params.outdir}/${meta.id}/truvari_bench"},
Expand All @@ -165,7 +165,7 @@ process {
}
withName: SVANALYZER_SVBENCHMARK {
ext.prefix = {"${params.sample}"}
ext.args = {"-normshift 0.3 –normdist 0.3 –normsizediff 0.3"}
ext.args = {"-normshift $params.normshift –normdist $params.normdist –normsizediff $params.normsizediff -maxdist $params.maxdist"}
ext.when = { params.method.split(',').contains('svanalyzer') }
publishDir = [
path: {"${params.outdir}/${meta.id}/svanalyzer_bench"},
Expand Down
21 changes: 17 additions & 4 deletions conf/test_hg38.config
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,26 @@ params {

// Processes
analysis = 'germline' //somatic
method = 'truvari,svanalyzer,wittyer' // --not working for now : vcfdist
method = 'truvari,svanalyzer' // --not working for now : vcfdist
similarity = 0 // determines the sequence similarity level in benchmarking.
preprocess = "normalization, deduplication"
min_sv_size = 50
variant_filtering = "include" // null, include, exclude
expression = 'FILTER="PASS"'

//variant_filtering = "include" // null, include, exclude
//expression = 'FILTER="PASS"'

//truvari benchmark parameters
pctsize = 0.3
pctseq = 0 // has to be 0 for unresolved variants to be benchmarked or when --dup-to-ins unsed
pctovl = 0
refdist = 100000
chunksize = 100000
pick = "ac"

//svanalyzer benchmark parameters
normshift = 0.3
normdist = 0.3
normsizediff = 0.3
maxdist = 100000

sample = "HG002" // available samples: SEQC2, HG002
truth_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz"
Expand Down
13 changes: 13 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,19 @@ params {
variant_filtering = null // null, include, exclude
expression = null

//truvari benchmark parameters
pctsize = 0.7
pctseq = 0.7 // has to be 0 for unresolved variants to be benchmarked or when --dup-to-ins unsed
pctovl = 0
refdist = 500
chunksize = 500
pick = "single"

//svanalyzer benchmark parameters
normshift = 0.2
normdist = 0.2
normsizediff = 0.2
maxdist = 100000

// References
genome = null
Expand Down
61 changes: 50 additions & 11 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -94,67 +94,106 @@
},
"analysis": {
"type": "string",
"format": "directory-path",
"description": "",
"fa_icon": "fas fa-folder-open"
},
"preprocess": {
"type": "string",
"format": "directory-path",
"description": "",
"fa_icon": "fas fa-folder-open"
},
"sv_standardization": {
"type": "string",
"format": "directory-path",
"description": "",
"fa_icon": "fas fa-folder-open"
},
"similarity": {
"type": "integer",
"format": "directory-path",
"description": "",
"fa_icon": "fas fa-folder-open"
},
"method": {
"type": "string",
"format": "directory-path",
"description": "",
"fa_icon": "fas fa-folder-open"
},
"min_sv_size": {
"type": "integer",
"format": "directory-path",
"description": "Maximum SV size of variants to benchmark, 0 to disable , Default:50",
"fa_icon": "fas fa-folder-open"
},
"max_sv_size": {
"type": "integer",
"format": "directory-path",
"description": "Maximum SV size of variants to benchmark, -1 to disable , Default:-1",
"fa_icon": "fas fa-folder-open"
},
"min_allele_freq": {
"type": "number",
"format": "directory-path",
"description": "Minimum Alele Frequency of variants to benchmark, Use -1 to disable , Default:-1",
"fa_icon": "fas fa-folder-open"
},
"min_num_reads": {
"type": "integer",
"format": "directory-path",
"description": "Minimum number of read supporting variants to benchmark, Use, -1 to disable , Default:-1",
"fa_icon": "fas fa-folder-open"
},
"pctsize": {
"type": "number",
"description": "TRUVARI PARAMETER. Ratio of min(base_size, comp_size)/max(base_size, comp_size)",
"fa_icon": "fas fa-folder-open"
},
"refdist": {
"type": "integer",
"description": "TRUVARI PARAMETER. Maximum distance comparison calls must be within from base call's start/end",
"fa_icon": "fas fa-folder-open"
},
"chunksize": {
"type": "integer",
"description": "TRUVARI PARAMETER.",
"fa_icon": "fas fa-folder-open"
},
"pctseq": {
"type": "number",
"description": "TRUVARI PARAMETER. Edit distance ratio between the REF/ALT haplotype sequences of base and comparison call.",
"fa_icon": "fas fa-folder-open"
},
"pctovl": {
"type": "number",
"description": "TRUVARI PARAMETER. Ratio of two calls' (overlapping bases)/(longest span).",
"fa_icon": "fas fa-folder-open"
},
"pick": {
"type": "string",
"description": "TRUVARI PARAMETER.How many matches a variant is allowed to participate in is controlled: single,ac,multi",
"fa_icon": "fas fa-folder-open"
},
"normshift": {
"type": "number",
"description": "SVANALYZER PARAMETER. Disallow matches if alignments between alternate alleles have normalized shift greater than normshift (default 0.2) ",
"fa_icon": "fas fa-folder-open"
},
"normdist": {
"type": "number",
"description": "SVANALYZER PARAMETER. Disallow matches if alternate alleles have normalized edit distance greater than normdist (default 0.2)",
"fa_icon": "fas fa-folder-open"
},
"normsizediff": {
"type": "number",
"description": "SVANALYZER PARAMETER. Disallow matches if alternate alleles have normalized size difference greater than normsizediff (default 0.2) ",
"fa_icon": "fas fa-folder-open"
},
"maxdist": {
"type": "integer",
"description": "SVANALYZER PARAMETER. Disallow matches if positions of two variants are more than maxdist bases from each other (default 100,000).",
"fa_icon": "fas fa-folder-open"
},
"variant_filtering": {
"type": "string",
"format": "directory-path",
"description": "Use either exclude or include to enable variant filtering using bcftools expressions, Default:null",
"fa_icon": "fas fa-folder-open"
},
"expression": {
"type": "string",
"format": "directory-path",
"description": "Use bcftools expressions here https://samtools.github.io/bcftools/bcftools.html#expressions. This must be coupled with variant_expression, Default:null",
"fa_icon": "fas fa-folder-open"
},
Expand Down
Loading