Skip to content

Commit 0473e15

Browse files
authored
Merge pull request #771 from nf-core/dev
Release 2.11.0
2 parents 3f40a1b + 61e8bc7 commit 0473e15

14 files changed

+265
-135
lines changed

CHANGELOG.md

+23
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,29 @@
33
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
44
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
55

6+
## nf-core/ampliseq version 2.11.0 - 2024-08-06
7+
8+
### `Added`
9+
10+
- [#765](https://github.com/nf-core/ampliseq/pull/765) - Added version R09-RS220 of curated GTDB 16S taxonomy: `sbdi-gtdb=R09-RS220-1` or `sbdi-gtdb` as parameter to `--dada_ref_taxonomy`
11+
- [#766](https://github.com/nf-core/ampliseq/pull/766) - Added version 10 of Unite as parameter for `--sintax_ref_taxonomy`: `unite-fungi=10.0` and `unite-alleuk=10.0`
12+
13+
### `Changed`
14+
15+
- [#762](https://github.com/nf-core/ampliseq/pull/762) - Improved output documentation section "Optional ASV filtering" and parameter documentation
16+
- [#766](https://github.com/nf-core/ampliseq/pull/766) - Modified warning filenames from `QIIME2_ANCOM` to avoid collisions
17+
- [#766](https://github.com/nf-core/ampliseq/pull/766),[#769](https://github.com/nf-core/ampliseq/pull/769) - Disabled Unite databases from the `--qiime_ref_taxonomy` because of divergent results compared to the other classifiers
18+
19+
### `Fixed`
20+
21+
- [#761](https://github.com/nf-core/ampliseq/pull/761) - Some sample sheet checks were not applied due to changes in the metadata ["meta"] structure in version 2.9.0
22+
- [#766](https://github.com/nf-core/ampliseq/pull/766) - Fixed broken urls for Unite databases (issue [#764](https://github.com/nf-core/ampliseq/issues/764))
23+
- [#769](https://github.com/nf-core/ampliseq/pull/769) - Reference taxonomy database values were not properly validated in versions 2.9.0 and 2.10.0
24+
25+
### `Dependencies`
26+
27+
### `Removed`
28+
629
## nf-core/ampliseq version 2.10.0 - 2024-06-27
730

831
### `Added`

assets/multiqc_config.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
report_comment: >
2-
This report has been generated by the <a href="https://github.com/nf-core/ampliseq/releases/tag/2.10.0" target="_blank">nf-core/ampliseq</a>
2+
This report has been generated by the <a href="https://github.com/nf-core/ampliseq/releases/tag/2.11.0" target="_blank">nf-core/ampliseq</a>
33
analysis pipeline. For information about how to interpret these results, please see the
4-
<a href="https://nf-co.re/ampliseq/2.10.0/docs/output" target="_blank">documentation</a>.
4+
<a href="https://nf-co.re/ampliseq/2.11.0/docs/output" target="_blank">documentation</a>.
55
report_section_order:
66
"nf-core-ampliseq-methods-description":
77
order: -1000

assets/report_template.Rmd

+1-1
Original file line numberDiff line numberDiff line change
@@ -1544,7 +1544,7 @@ for (folder in ancom) {
15441544
any_ancombc <- !isFALSE(params$ancombc) || !isFALSE(params$ancombc_formula)
15451545
```
15461546

1547-
```{r, eval = !isFALSE(params$any_ancombc), results='asis'}
1547+
```{r, eval = !isFALSE(any_ancombc), results='asis'}
15481548
cat(paste0("
15491549
## ANCOM-BC
15501550

bin/taxref_reformat_sintax.sh bin/taxref_reformat_sintax_fasta.sh

-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,3 @@
55
# Just rename the preformatted file
66
# Assumes only one (gzipped) file
77
mv * sintaxdb.fa.gz
8-

bin/taxref_reformat_sintax_tar.sh

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/sh
2+
3+
# Handles preformatted database tar files suitable for sintax
4+
#
5+
# This turned out to be a MISTAKE and is NOT USED, but I'm keeping the file for a while anyway.
6+
7+
# Extract the fasta file without _dev in its name
8+
f=$(tar tfz *.tgz | grep fasta | grep -v '_dev')
9+
tar xzf *.tgz $f
10+
11+
# Change the name and gzip
12+
mv $f sintaxdb.fa
13+
gzip sintaxdb.fa

conf/ref_databases.config

+107-60
Large diffs are not rendered by default.

conf/test_its_dada_taxonomy.config

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3+
Nextflow config file for running minimal tests
4+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5+
Defines input files and everything required to run a fast and simple pipeline test.
6+
7+
Use as follows:
8+
nextflow run nf-core/ampliseq -profile test_sintax,<docker/singularity> --outdir <OUTDIR>
9+
10+
----------------------------------------------------------------------------------------
11+
*/
12+
13+
params {
14+
config_profile_name = 'Test sintax profile'
15+
config_profile_description = 'Minimal test dataset to check pipeline function for ITS data with the DADA2 taxonomy classifier'
16+
17+
// Limit resources so that this can run on GitHub Actions
18+
max_cpus = 2
19+
max_memory = '12.GB'
20+
max_time = '6.h'
21+
22+
// Input data
23+
FW_primer = "CTTGGTCATTTAGAGGAAGTAA"
24+
RV_primer = "TCCTGAGGGAAACTTCG"
25+
input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_pacbio_ITS.tsv"
26+
metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Metadata_pacbio_ITS.tsv"
27+
pacbio = true
28+
max_ee = 12
29+
cut_its = "its2"
30+
31+
skip_dada_taxonomy = false
32+
dada_ref_taxonomy = "unite-fungi"
33+
34+
//this is to remove low abundance ASVs to reduce runtime of downstream processes
35+
min_samples = 2
36+
min_frequency = 10
37+
38+
//produce average barplots
39+
metadata_category_barplot = "var2,var3"
40+
41+
//restrict ANCOM analysis to higher taxonomic levels
42+
tax_agglom_max = 4
43+
ancom = true
44+
45+
sbdiexport = true
46+
47+
qiime_adonis_formula = "var2"
48+
49+
diversity_rarefaction_depth = 500
50+
}

docs/output.md

+6-4
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
2323
- [Cutadapt](#cutadapt) - Primer trimming
2424
- [MultiQC](#multiqc) - Aggregate report describing results
2525
- [ASV inferrence with DADA2](#asv-inferrence-with-dada2) - Infer Amplicon Sequence Variants (ASVs)
26-
- [Optional ASV filtering](#optional-asv-filtering) - Filter ASVs to optimize downstream analysis
26+
- [Optional ASV post processing](#optional-asv-post-processing) - Filter ASVs to optimize downstream analysis
2727
- [VSEARCH cluster](#vsearch-cluster) - Centroid fasta file, filtered asv table, and stats
2828
- [Barrnap](#barrnap) - Predict ribosomal RNA sequences and optional filtering
2929
- [Length filter](#length-filter) - Optionally, ASV can be filtered by length thresholds
@@ -163,7 +163,9 @@ For binned quality scores in NovaSeq data, monotonicity in the fitted error mode
163163

164164
</details>
165165

166-
### Optional ASV filtering
166+
### Optional ASV post processing
167+
168+
ASV post-processing takes place after DADA2's ASV computation (i.e. after chimera removal, for example table `ASV_tax.tsv`) but _before_ taxonomic classification. Post-processing will affect all downstream files. Clustering and filters are applied sequentially, in the same sequence as shown here. All filters are off by default and can be enabled by setting thresholds as detailed in the parameter documentation.
167169

168170
#### VSEARCH cluster
169171

@@ -184,7 +186,7 @@ This directory will hold the centroid fasta file, the filtered asv count table (
184186

185187
Barrnap predicts the location of ribosomal RNA genes in genomes, here it can be used to discriminate rRNA sequences from potential contamination. It supports bacteria (5S,23S,16S), archaea (5S,5.8S,23S,16S), metazoan mitochondria (12S,16S) and eukaryotes (5S,5.8S,28S,18S).
186188

187-
Optionally, ASV sequences can be filtered for rRNA sequences identified by Barrnap with `--filter_ssu` that can take a list of abbreviations of the above supported categories (kingdoms), e.g. `bac,arc,mito,euk`. This filtering takes place after DADA2's ASV computation (i.e. after chimera removal) but _before_ taxonomic classification (also applies to above mentioned taxonomic classification with DADA2, i.e. files `ASV_tax.tsv` & `ASV_tax_species.tsv`).
189+
Optionally, ASV sequences can be filtered for rRNA sequences identified by Barrnap with `--filter_ssu` that can take a list of abbreviations of the above supported categories (kingdoms), e.g. `bac,arc,mito,euk`.
188190

189191
<details markdown="1">
190192
<summary>Output files</summary>
@@ -200,7 +202,7 @@ Optionally, ASV sequences can be filtered for rRNA sequences identified by Barrn
200202

201203
#### Length filter
202204

203-
Optionally, a length filter can be used to reduce potential contamination after ASV computation. For example with 515f and 806r primers the majority of 16S rRNA amplicon sequences should have a length of 253 bp and amplicons vary significantely are likely spurious.
205+
Optionally, a length filter can be used to reduce potential contamination after ASV computation. For example with 515f and 806r primers the majority of 16S rRNA amplicon sequences should have a length of 253 bp and amplicons that differ significantly from this are likely spurious.
204206

205207
The minimum ASV length threshold can be set by `--min_len_asv` and the maximum length threshold with `--max_len_asv`. If no threshold is set, the filter (and output) is omitted.
206208

docs/usage.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,8 @@ Pre-configured reference taxonomy databases are:
229229
| greengenes | - | - | + | (+)² | 16S rRNA |
230230
| greengenes2 | - | - | - | + | 16S rRNA |
231231
| pr2 | + | - | - | - | 18S rRNA |
232-
| unite-fungi | + | + | - | + | eukaryotic nuclear ribosomal ITS region |
233-
| unite-alleuk | + | + | - | + | eukaryotic nuclear ribosomal ITS region |
232+
| unite-fungi | + | + | - | - | eukaryotic nuclear ribosomal ITS region |
233+
| unite-alleuk | + | + | - | - | eukaryotic nuclear ribosomal ITS region |
234234
| coidb | + | + | - | - | eukaryotic Cytochrome Oxidase I (COI) |
235235
| midori2-co1 | + | - | - | - | eukaryotic Cytochrome Oxidase I (COI) |
236236
| phytoref | + | - | - | - | eukaryotic plastid 16S rRNA |

modules/local/qiime2_ancom_tax.nf

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ process QIIME2_ANCOM_TAX {
4343
--to-tsv
4444
4545
if [ \$(grep -v '^#' -c ${table.baseName}-level-${taxlevel}.feature-table.tsv) -lt 2 ]; then
46-
echo ${taxlevel} > ancom/\"WARNING Summing your data at taxonomic level ${taxlevel} produced less than two rows (taxa), ANCOM can't proceed -- did you specify a bad reference taxonomy?\".txt
46+
echo ${taxlevel} > ancom/\"WARNING ${table.baseName} Summing your data at taxonomic level ${taxlevel} produced less than two rows (taxa), ANCOM can't proceed -- did you specify a bad reference taxonomy?\".txt
4747
else
4848
qiime composition add-pseudocount \\
4949
--i-table lvl${taxlevel}-${table} \\

nextflow.config

+17-16
Original file line numberDiff line numberDiff line change
@@ -284,21 +284,22 @@ profiles {
284284
executor.cpus = 4
285285
executor.memory = 8.GB
286286
}
287-
test { includeConfig 'conf/test.config' }
288-
test_single { includeConfig 'conf/test_single.config' }
289-
test_multi { includeConfig 'conf/test_multi.config' }
290-
test_doubleprimers { includeConfig 'conf/test_doubleprimers.config' }
291-
test_pacbio_its { includeConfig 'conf/test_pacbio_its.config' }
292-
test_iontorrent { includeConfig 'conf/test_iontorrent.config' }
293-
test_fasta { includeConfig 'conf/test_fasta.config' }
294-
test_failed { includeConfig 'conf/test_failed.config' }
295-
test_full { includeConfig 'conf/test_full.config' }
296-
test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' }
297-
test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' }
298-
test_novaseq { includeConfig 'conf/test_novaseq.config' }
299-
test_pplace { includeConfig 'conf/test_pplace.config' }
300-
test_sintax { includeConfig 'conf/test_sintax.config' }
301-
test_multiregion { includeConfig 'conf/test_multiregion.config' }
287+
test { includeConfig 'conf/test.config' }
288+
test_single { includeConfig 'conf/test_single.config' }
289+
test_multi { includeConfig 'conf/test_multi.config' }
290+
test_doubleprimers { includeConfig 'conf/test_doubleprimers.config' }
291+
test_pacbio_its { includeConfig 'conf/test_pacbio_its.config' }
292+
test_iontorrent { includeConfig 'conf/test_iontorrent.config' }
293+
test_fasta { includeConfig 'conf/test_fasta.config' }
294+
test_failed { includeConfig 'conf/test_failed.config' }
295+
test_full { includeConfig 'conf/test_full.config' }
296+
test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' }
297+
test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' }
298+
test_novaseq { includeConfig 'conf/test_novaseq.config' }
299+
test_pplace { includeConfig 'conf/test_pplace.config' }
300+
test_sintax { includeConfig 'conf/test_sintax.config' }
301+
test_its_dada_taxonomy { includeConfig 'conf/test_its_dada_taxonomy.config' }
302+
test_multiregion { includeConfig 'conf/test_multiregion.config' }
302303
}
303304

304305
// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
@@ -356,7 +357,7 @@ manifest {
356357
description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2"""
357358
mainScript = 'main.nf'
358359
nextflowVersion = '!>=23.04.0'
359-
version = '2.10.0'
360+
version = '2.11.0'
360361
doi = '10.5281/zenodo.1493841,10.3389/fmicb.2020.550420'
361362
}
362363

nextflow_schema.json

+21-28
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@
150150
"primer_removal": {
151151
"title": "Primer removal",
152152
"type": "object",
153+
"description": "Spurious sequences sometimes lack primer sequences and primers introduce errors that can be removed in that step",
153154
"default": "",
154155
"properties": {
155156
"retain_untrimmed": {
@@ -188,7 +189,7 @@
188189
"read_trimming_and_quality_filtering": {
189190
"title": "Read trimming and quality filtering",
190191
"type": "object",
191-
"description": "",
192+
"description": "Read trimming and quality filtering is supposed to reduce spurious results and aid error correction",
192193
"default": "",
193194
"properties": {
194195
"trunclenf": {
@@ -271,6 +272,7 @@
271272
"asv_post_processing": {
272273
"title": "ASV post processing",
273274
"type": "object",
275+
"description": "ASV post-processing takes place after ASV computation but before taxonomic assignment, it will affect all downstream processes",
274276
"default": "",
275277
"properties": {
276278
"vsearch_cluster": {
@@ -370,21 +372,22 @@
370372
"rdp",
371373
"rdp=18",
372374
"sbdi-gtdb",
373-
"sbdi-gtdb=R06-RS202-1",
374-
"sbdi-gtdb=R06-RS202-3",
375-
"sbdi-gtdb=R07-RS207-1",
375+
"sbdi-gtdb=R09-RS220-1",
376376
"sbdi-gtdb=R08-RS214-1",
377+
"sbdi-gtdb=R07-RS207-1",
378+
"sbdi-gtdb=R06-RS202-3",
379+
"sbdi-gtdb=R06-RS202-1",
377380
"silva",
378381
"silva=132",
379382
"silva=138",
380383
"unite-alleuk",
381-
"unite-alleuk=8.2",
382-
"unite-alleuk=8.3",
383384
"unite-alleuk=9.0",
385+
"unite-alleuk=8.3",
386+
"unite-alleuk=8.2",
384387
"unite-fungi",
385-
"unite-fungi=8.2",
386-
"unite-fungi=8.3",
387388
"unite-fungi=9.0",
389+
"unite-fungi=8.3",
390+
"unite-fungi=8.2",
388391
"zehr-nifh",
389392
"zehr-nifh=2.5.0"
390393
]
@@ -451,20 +454,7 @@
451454
"type": "string",
452455
"help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database and initiate taxonomic classification with QIIME2 and the chosen database.\n\nIf both, `--dada_ref_taxonomy` and `--qiime_ref_taxonomy` are used, DADA2 classification will be used for downstream analysis.\n\nThe following databases are supported:\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- Greengenes (only testing!)\n\nGenerally, using `silva`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version. For testing purposes, the tiny database `greengenes85` (dereplicated at 85% sequence similarity) is available. For details on what values are valid, please either use an invalid value such as `x` (causing the pipeline to send an error message with all valid values) or see `conf/ref_databases.config`.",
453456
"description": "Name of supported database, and optionally also version number",
454-
"enum": [
455-
"silva=138",
456-
"silva",
457-
"unite-fungi=8.3",
458-
"unite-fungi=8.2",
459-
"unite-fungi",
460-
"unite-alleuk=9.0",
461-
"unite-alleuk=8.3",
462-
"unite-alleuk=8.2",
463-
"unite-alleuk",
464-
"greengenes85",
465-
"greengenes2",
466-
"greengenes2=2022.10"
467-
]
457+
"enum": ["silva=138", "silva", "greengenes85", "greengenes2", "greengenes2=2022.10"]
468458
},
469459
"qiime_ref_tax_custom": {
470460
"type": "string",
@@ -517,14 +507,16 @@
517507
"enum": [
518508
"coidb",
519509
"coidb=221216",
510+
"unite-fungi",
511+
"unite-fungi=10.0",
520512
"unite-fungi=9.0",
521513
"unite-fungi=8.3",
522514
"unite-fungi=8.2",
523-
"unite-fungi",
515+
"unite-alleuk",
516+
"unite-alleuk=10.0",
524517
"unite-alleuk=9.0",
525518
"unite-alleuk=8.3",
526-
"unite-alleuk=8.2",
527-
"unite-alleuk"
519+
"unite-alleuk=8.2"
528520
]
529521
},
530522
"addsh": {
@@ -575,6 +567,7 @@
575567
"title": "ASV filtering",
576568
"type": "object",
577569
"default": "",
570+
"description": "Filtering by taxonomy or abundance will affect all downstream analysis",
578571
"fa_icon": "fas fa-filter",
579572
"properties": {
580573
"exclude_taxa": {
@@ -600,7 +593,7 @@
600593
"downstream_analysis": {
601594
"title": "Downstream analysis",
602595
"type": "object",
603-
"description": "",
596+
"description": "Metadata is used here to visualize data either for quality control or publication ready figures",
604597
"default": "",
605598
"fa_icon": "fas fa-bacteria",
606599
"properties": {
@@ -652,7 +645,7 @@
652645
"differential_abundance_analysis": {
653646
"title": "Differential abundance analysis",
654647
"type": "object",
655-
"description": "",
648+
"description": "Differential abundance analysis relies on provided metadata",
656649
"default": "",
657650
"fa_icon": "fas fa-bacteria",
658651
"properties": {
@@ -705,7 +698,7 @@
705698
"pipeline_report": {
706699
"title": "Pipeline summary report",
707700
"type": "object",
708-
"description": "",
701+
"description": "Customization of the pipeline report",
709702
"default": "",
710703
"properties": {
711704
"report_template": {

0 commit comments

Comments
 (0)