@@ -48,7 +48,7 @@ rule curate:
48
48
annotations = config ["curate" ]["annotations" ],
49
49
manual_mapping = "defaults/host_hostgenus_hosttype_map.tsv" ,
50
50
output :
51
- metadata = "data/raw_metadata_curated .tsv" ,
51
+ metadata = "data/all_metadata .tsv" ,
52
52
sequences = "results/sequences.fasta" ,
53
53
log :
54
54
"logs/curate.txt" ,
@@ -68,7 +68,7 @@ rule curate:
68
68
authors_default_value = config ["curate" ]["authors_default_value" ],
69
69
abbr_authors_field = config ["curate" ]["abbr_authors_field" ],
70
70
annotations_id = config ["curate" ]["annotations_id" ],
71
- metadata_columns = config ["curate" ]["metadata_columns " ],
71
+ added_columns = config ["curate" ]["added_columns " ],
72
72
id_field = config ["curate" ]["output_id_field" ],
73
73
sequence_field = config ["curate" ]["output_sequence_field" ],
74
74
shell :
@@ -98,7 +98,7 @@ rule curate:
98
98
| ./scripts/transform-state-names \
99
99
| ./scripts/post_process_metadata.py \
100
100
| ./scripts/add-field-names \
101
- --metadata-columns {params.metadata_columns } \
101
+ --metadata-columns {params.added_columns } \
102
102
| ./scripts/transform-new-fields \
103
103
--map-tsv {input.manual_mapping} \
104
104
--map-id host \
@@ -113,12 +113,31 @@ rule curate:
113
113
--output-id-field {params.id_field} \
114
114
--output-seq-field {params.sequence_field} ) 2>> {log}
115
115
"""
116
+ rule add_metadata_columns :
117
+ """Add columns to metadata
118
+ Notable columns:
119
+ - [NEW] url: URL linking to the NCBI GenBank record ('https://www.ncbi.nlm.nih.gov/nuccore/*').
120
+ """
121
+ input :
122
+ metadata = "data/all_metadata.tsv"
123
+ output :
124
+ metadata = temp ("data/all_metadata_added.tsv" )
125
+ params :
126
+ accession = config ['curate' ]['genbank_accession' ]
127
+ shell :
128
+ """
129
+ csvtk mutate2 -t \
130
+ -n url \
131
+ -e '"https://www.ncbi.nlm.nih.gov/nuccore/" + ${params.accession}' \
132
+ {input.metadata} \
133
+ > {output.metadata}
134
+ """
116
135
117
136
rule subset_metadata :
118
137
input :
119
- metadata = "data/raw_metadata_curated .tsv" ,
138
+ metadata = "data/all_metadata_added .tsv" ,
120
139
output :
121
- metadata = "data/raw_metadata .tsv" ,
140
+ metadata = "data/subset_metadata .tsv" ,
122
141
params :
123
142
metadata_fields = "," .join (config ["curate" ]["metadata_columns" ]),
124
143
shell :
0 commit comments