From 5daf6e85df1073b4a1235201399c3afed3939fdd Mon Sep 17 00:00:00 2001 From: james hadfield Date: Tue, 16 May 2023 20:19:37 +1200 Subject: [PATCH 1/3] Do not produce duplicate coloring entries The logic for creating colorings involves many steps, and when adding a coloring we had neglected to check if it had already been added. For instance, colorings explicitly defined in the auspice-config which were provided via node-data files (rather than the metadata tsv) would be added twice. In our current nCoV build there are 7 such duplicates! There were also a few duplicates that were present in our tests. Whoops! These weren't a problem for auspice (the first was used), but added unnecessary complexity and bytes to the JSON. Closes #719 --- augur/export_v2.py | 16 ++++++++++------ tests/functional/export_v2/dataset1.json | 24 ------------------------ tests/functional/export_v2/dataset2.json | 24 ------------------------ tests/functional/export_v2/dataset3.json | 24 ------------------------ 4 files changed, 10 insertions(+), 78 deletions(-) diff --git a/augur/export_v2.py b/augur/export_v2.py index 47ec01e63..d380823b9 100644 --- a/augur/export_v2.py +++ b/augur/export_v2.py @@ -355,12 +355,16 @@ def _add_title_and_type(coloring): return False # a warning message will have been printed before `InvalidOption` is raised return coloring - def _create_coloring(key): + def _add_coloring(colorings, key): # handle deprecations if key == "authors": deprecated("[colorings] The 'authors' key is now called 'author'") key = "author" - return {"key": key} + # check if the key has already been added by another part of the color-creating logic + if key in {x['key'] for x in colorings}: + return False + colorings.append({"key": key}) + return True def _is_valid(coloring): key = coloring["key"] @@ -389,18 +393,18 @@ def _get_colorings(): if command_line_colorings: # start with auto_colorings (already validated to be included) for x in auto_colorings: - colorings.append(_create_coloring(x)) + _add_coloring(colorings, x) # then add in command line colorings for x in command_line_colorings: - colorings.append(_create_coloring(x)) + _add_coloring(colorings, x) else: # if we have a config file, start with these (extra info, such as title&type, is added in later) if config: for x in config.keys(): - colorings.append(_create_coloring(x)) + _add_coloring(colorings, x) # then add in any auto-colorings already validated to include for x in auto_colorings: - colorings.append(_create_coloring(x)) + _add_coloring(colorings, x) explicitly_defined_colorings = [x["key"] for x in colorings] # add in genotype as a special case if (a) not already set and (b) the data supports it diff --git a/tests/functional/export_v2/dataset1.json b/tests/functional/export_v2/dataset1.json index 5e0153068..90cd07330 100644 --- a/tests/functional/export_v2/dataset1.json +++ b/tests/functional/export_v2/dataset1.json @@ -69,30 +69,6 @@ ], "title": "Mutations per branch", "type": "continuous" - }, - { - "key": "location", - "legend": [ - { - "display": "\u03b1", - "value": "alpha" - }, - { - "value": "beta" - } - ], - "scale": [ - [ - "beta", - "#bd0026" - ], - [ - "gamma", - "#6a51a3" - ] - ], - "title": "Location", - "type": "categorical" } ], "filters": [ diff --git a/tests/functional/export_v2/dataset2.json b/tests/functional/export_v2/dataset2.json index 526d5e218..f65b5429c 100644 --- a/tests/functional/export_v2/dataset2.json +++ b/tests/functional/export_v2/dataset2.json @@ -69,30 +69,6 @@ ], "title": "Mutations per branch", "type": "continuous" - }, - { - "key": "location", - "legend": [ - { - "display": "\u03b1", - "value": "alpha" - }, - { - "value": "beta" - } - ], - "scale": [ - [ - "beta", - "#bd0026" - ], - [ - "gamma", - "#6a51a3" - ] - ], - "title": "Location", - "type": "categorical" } ], "filters": [ diff --git a/tests/functional/export_v2/dataset3.json b/tests/functional/export_v2/dataset3.json index f965d94c3..21159083c 100644 --- a/tests/functional/export_v2/dataset3.json +++ b/tests/functional/export_v2/dataset3.json @@ -82,30 +82,6 @@ "#4042C7" ] ] - }, - { - "key": "location", - "title": "Location", - "type": "categorical", - "scale": [ - [ - "beta", - "#bd0026" - ], - [ - "gamma", - "#6a51a3" - ] - ], - "legend": [ - { - "value": "alpha", - "display": "\u03b1" - }, - { - "value": "beta" - } - ] } ], "filters": [ From c4ff0fc7cf751c255aeec0a0d9e51aa5c8ddbe5d Mon Sep 17 00:00:00 2001 From: james hadfield Date: Tue, 16 May 2023 20:32:12 +1200 Subject: [PATCH 2/3] Restore clades to top entry in colorings The intention of the coloring logic is that if an auspice-config provides the clade_membership key then it is exported at that position in the colorings list. If clade_membership is not explicitly set in the config (but is present in a node-data file) then we have (for a very long time) added it as the very first entry in the colorings list. PR #728 (augur v22.0.0) erroneously modified the behavior of the second case described above, which has now been restored by this commit. --- augur/export_v2.py | 9 ++++----- .../functional/export_v2/dataset-with-branch-labels.json | 8 ++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/augur/export_v2.py b/augur/export_v2.py index d380823b9..a1fea99ea 100644 --- a/augur/export_v2.py +++ b/augur/export_v2.py @@ -361,10 +361,8 @@ def _add_coloring(colorings, key): deprecated("[colorings] The 'authors' key is now called 'author'") key = "author" # check if the key has already been added by another part of the color-creating logic - if key in {x['key'] for x in colorings}: - return False - colorings.append({"key": key}) - return True + if key not in {x['key'] for x in colorings}: + colorings.append({"key": key}) def _is_valid(coloring): key = coloring["key"] @@ -798,8 +796,9 @@ def node_data_prop_is_normal_trait(name): "authors", # authors are set as a node property, not a trait property "author", # see above "vaccine", # vaccine info is stored as a "special" node prop + 'clade_membership', # explicitly set as a coloring if present 'branch_length', - 'num_date', + 'num_date', # explicitly set as a coloring if present 'raw_date', 'numdate', 'clock_length', diff --git a/tests/functional/export_v2/dataset-with-branch-labels.json b/tests/functional/export_v2/dataset-with-branch-labels.json index d09dfb61a..4bcedc468 100644 --- a/tests/functional/export_v2/dataset-with-branch-labels.json +++ b/tests/functional/export_v2/dataset-with-branch-labels.json @@ -25,13 +25,13 @@ }, "colorings": [ { - "key": "gt", - "title": "Genotype", + "key": "clade_membership", + "title": "Clade", "type": "categorical" }, { - "key": "clade_membership", - "title": "Clade", + "key": "gt", + "title": "Genotype", "type": "categorical" } ], From 9e9d4a40c53e84c79d9bf0e6889a599f36605192 Mon Sep 17 00:00:00 2001 From: james hadfield Date: Tue, 16 May 2023 22:09:52 +1200 Subject: [PATCH 3/3] changelog --- CHANGES.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index ef05ce573..9784d96aa 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,9 +4,15 @@ ### Bug fixes +* export: No longer export duplicate entries in the colorings array, a bug which has been present in Augur since at least v12 [#719][]. [#1218][] (@jameshadfield) + +* export: In version 22.0.0, some configurations of export may have resulted in the clade coloring appearing last in the Auspice dropdown rather than first. This is now fixed. [#1218] (@jameshadfield) + * export: In version 22.0.0, validation of `augur.utils.read_node_data` was changed to error when a node data JSON did not contain any actual data. This causes export to error when an empty node data JSON is passed, as for example in ncov's pathogen-ci. This is now fixed by warning instead. The bug was originally introduced in PR [#728][]. [#1214][] (@corneliusroemer) +[#719]: https://github.com/nextstrain/augur/issues/719 [#1214]: https://github.com/nextstrain/augur/pull/1214 +[#1218]: https://github.com/nextstrain/augur/pull/1218 ## 22.0.0 (9 May 2023)