diff --git a/augur/clades.py b/augur/clades.py index 1a1e71d8c..677911ef2 100644 --- a/augur/clades.py +++ b/augur/clades.py @@ -7,19 +7,32 @@ import pandas as pd import numpy as np from collections import defaultdict +import networkx as nx +from itertools import islice from .utils import get_parent_name_by_child_name_for_tree, read_node_data, write_json, get_json_name def read_in_clade_definitions(clade_file): ''' Reads in tab-seperated file that defines clades by amino acid or nucleotide mutations + Inheritance is allowed, but needs to be acyclic. Alleles can be overwritten by inheriting clades. + + Sites are 1 indexed in the file, and are converted to 0 indexed in the output + + Empty lines are ignored, comments after # are ignored + Format ------ - clade gene site alt - Clade_1 ctpE 81 D - Clade_2 nuc 30642 T - Clade_3 nuc 444296 A - Clade_4 pks8 634 T + clade gene site alt + Clade_1 ctpE 81 D + Clade_2 nuc 30642 T + Clade_3 nuc 444296 A + Clade_3 S 1 P + \\# Clade_4 inherits from Clade_3 + Clade_4 clade Clade_3 + Clade_4 pks8 634 T + \\# Inherited allele can be overwritten + Clade_4 S 1 L Parameters ---------- @@ -32,13 +45,72 @@ def read_in_clade_definitions(clade_file): clade definitions as :code:`{clade_name:[(gene, site, allele),...]}` ''' - clades = defaultdict(list) - df = pd.read_csv(clade_file, sep='\t' if clade_file.endswith('.tsv') else ',') - for index, row in df.iterrows(): - allele = (row.gene, row.site-1, row.alt) - clades[row.clade].append(allele) - clades.default_factory = None - + clades = defaultdict(lambda: defaultdict(str)) + df = pd.read_csv( + clade_file, + sep='\t' if clade_file.endswith('.tsv') else ',', + comment='#' + ) + + clade_inheritance_rows = df[df['gene'] == 'clade'] + + # Identify clades that inherit more than once + clades_with_multiple_inheritance = clade_inheritance_rows[clade_inheritance_rows.duplicated(subset=["clade"])]['clade'].tolist() + if len(clades_with_multiple_inheritance) > 0: + raise ValueError(f"Clades {clades_with_multiple_inheritance} have multiple inheritance, that's not allowed") + + # Identify clades that inherit from non-existent clades + missing_parent_clades = set(clade_inheritance_rows['site']) - set(df["clade"]) + if len(missing_parent_clades) > 0: + raise ValueError(f"Clades {missing_parent_clades} are inherited from but are not defined") + + + G = nx.DiGraph() + + # Use integer 0 as root so as not to conflict with any string clade names + # String '0' can still be used this way + root = 0 + # For every clade, add edge from root as default + # This way all clades can be reached by traversal + for clade in df.clade.unique(): + G.add_edge(root, clade) + + # Build inheritance graph + # For clades that inherit, disconnect from root + # Add edge from parent + for _, row in clade_inheritance_rows.iterrows(): + G.remove_edge(root, row.clade) + G.add_edge(row.site, row.clade) + + if not nx.is_directed_acyclic_graph(G): + raise ValueError(f"Clade definitions contain cycles {list(nx.simple_cycles(G))}") + + # Traverse graph top down, so that children can inherit from parents and grandparents + # Topological sort ensures parents are visited before children + # islice is used to skip the root node (which has no parent) + for clade in islice(nx.topological_sort(G),1,None): + # Get name of parent clade + # G.predecessors(clade) returns iterator, thus next() necessary + # despite the fact that there should only be one parent + parent_clade = next(G.predecessors(clade)) + # Inheritance from parents happens here + # Allele dict is initialized with alleles from parent + clades[clade] = clades[parent_clade].copy() + for _, row in df[(df.clade == clade) & (df.gene != 'clade')].iterrows(): + # Overwrite of parent alleles is possible and happens here + clades[clade][(row.gene, int(row.site)-1)] = row.alt + + # Convert items from dict[str, dict[(str,int),str]] to dict[str, list[(str,int,str)]] + clades = { + clade: [ + gene_site + (alt,) + for gene_site, alt in clade_definition.items() + ] + for clade, clade_definition in clades.items() + # If clause avoids root (helper) from being emmitted + if clade != root + } + return clades diff --git a/setup.py b/setup.py index ab0e9636e..907168ae7 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ "bcbio-gff >=0.6.0, ==0.6.*", "biopython >=1.67, !=1.77, !=1.78", "jsonschema >=3.0.0, ==3.*", + "networkx >= 2.5, ==2.*", "packaging >=19.2", "pandas >=1.0.0, ==1.*", "phylo-treetime ==0.8.*", diff --git a/tests/data/clades/commented_clades.tsv b/tests/data/clades/commented_clades.tsv new file mode 100644 index 000000000..f9200539e --- /dev/null +++ b/tests/data/clades/commented_clades.tsv @@ -0,0 +1,7 @@ +clade gene site alt +# Comment +Clade_1 ctpE 81 D +# Comment +Clade_2 nuc 30642 T +Clade_3 nuc 444296 A# Comment +Clade_3 pks8 634 T diff --git a/tests/data/clades/empty_lines_clades.tsv b/tests/data/clades/empty_lines_clades.tsv new file mode 100644 index 000000000..30bcf1428 --- /dev/null +++ b/tests/data/clades/empty_lines_clades.tsv @@ -0,0 +1,8 @@ +clade gene site alt +Clade_1 ctpE 81 D +Clade_2 nuc 30642 T + +Clade_3 nuc 444296 A + + +Clade_3 pks8 634 T diff --git a/tests/data/clades/inherit_chained_clades.tsv b/tests/data/clades/inherit_chained_clades.tsv new file mode 100644 index 000000000..61bb1b96b --- /dev/null +++ b/tests/data/clades/inherit_chained_clades.tsv @@ -0,0 +1,6 @@ +clade gene site alt +Clade_1 ctpE 81 D +Clade_2 clade Clade_1 +Clade_2 nuc 30642 T +Clade_3 clade Clade_2 +Clade_3 pks8 634 T diff --git a/tests/data/clades/inherit_clades.tsv b/tests/data/clades/inherit_clades.tsv new file mode 100644 index 000000000..a7c370024 --- /dev/null +++ b/tests/data/clades/inherit_clades.tsv @@ -0,0 +1,5 @@ +clade gene site alt +Clade_1 ctpE 81 D +Clade_2 nuc 30642 T +Clade_3 clade Clade_2 +Clade_3 pks8 634 T diff --git a/tests/data/clades/inherit_cycle_clades.tsv b/tests/data/clades/inherit_cycle_clades.tsv new file mode 100644 index 000000000..06eeba0fd --- /dev/null +++ b/tests/data/clades/inherit_cycle_clades.tsv @@ -0,0 +1,6 @@ +clade gene site alt +Clade_1 ctpE 81 D +Clade_2 nuc 30642 T +Clade_2 clade Clade_3 +Clade_3 clade Clade_2 +Clade_3 pks8 634 T diff --git a/tests/data/clades/multiple_inheritance_clades.tsv b/tests/data/clades/multiple_inheritance_clades.tsv new file mode 100644 index 000000000..3527f64fb --- /dev/null +++ b/tests/data/clades/multiple_inheritance_clades.tsv @@ -0,0 +1,6 @@ +clade gene site alt +Clade_1 ctpE 81 D +Clade_2 nuc 30642 T +Clade_3 clade Clade_1 +Clade_3 clade Clade_2 +Clade_3 pks8 634 T diff --git a/tests/data/clades/nonexistent_clade_inheritance_clades.tsv b/tests/data/clades/nonexistent_clade_inheritance_clades.tsv new file mode 100644 index 000000000..2d7a4650e --- /dev/null +++ b/tests/data/clades/nonexistent_clade_inheritance_clades.tsv @@ -0,0 +1,5 @@ +clade gene site alt +Clade_1 ctpE 81 D +Clade_2 nuc 30642 T +Clade_3 clade Clade_X +Clade_3 pks8 634 T diff --git a/tests/data/clades/self_inherit_clades.tsv b/tests/data/clades/self_inherit_clades.tsv new file mode 100644 index 000000000..787db3326 --- /dev/null +++ b/tests/data/clades/self_inherit_clades.tsv @@ -0,0 +1,5 @@ +clade gene site alt +Clade_1 ctpE 81 D +Clade_2 nuc 30642 T +Clade_3 clade Clade_3 +Clade_3 pks8 634 T diff --git a/tests/data/clades/simple_clades.tsv b/tests/data/clades/simple_clades.tsv new file mode 100644 index 000000000..16d3be0b1 --- /dev/null +++ b/tests/data/clades/simple_clades.tsv @@ -0,0 +1,5 @@ +clade gene site alt +Clade_1 ctpE 81 D +Clade_2 nuc 30642 T +Clade_3 nuc 444296 A +Clade_3 pks8 634 T \ No newline at end of file diff --git a/tests/functional/clades.t b/tests/functional/clades.t new file mode 100644 index 000000000..df9741894 --- /dev/null +++ b/tests/functional/clades.t @@ -0,0 +1,16 @@ +Integration tests for augur clades. + + $ pushd "$TESTDIR" > /dev/null + $ export AUGUR="../../bin/augur" + +Test augur clades with simple Zika input files and hierarchical clades. + + $ ${AUGUR} clades \ + > --tree clades/tree.nwk \ + > --mutations clades/aa_muts.json clades/nt_muts_small.json \ + > --clades clades/clades.tsv \ + > --output-node-data "$TMP/clades.json" &>/dev/null + + $ python3 "$TESTDIR/../../scripts/diff_jsons.py" clades/clades.json "$TMP/clades.json" \ + > --exclude-paths "root['generated_by']" + {} \ No newline at end of file diff --git a/tests/functional/clades/aa_muts.json b/tests/functional/clades/aa_muts.json new file mode 100644 index 000000000..8a5953e30 --- /dev/null +++ b/tests/functional/clades/aa_muts.json @@ -0,0 +1,328 @@ +{ + "nodes": { + "BRA/2016/FC_6706": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "COL/FLR_00008/2015": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [ + "L169I", + "G292E" + ], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "Colombia/2016/ZC204Se": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [ + "N17K" + ] + } + }, + "DOM/2016/BB_0183": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [ + "D208G" + ], + "NS2A": [ + "L152M" + ], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [ + "I322V", + "Q650R", + "D878E" + ], + "PRO": [] + } + }, + "EcEs062_16": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "HND/2016/HU_ME59": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [ + "G100A" + ], + "NS2A": [], + "NS2B": [], + "NS3": [ + "M572L" + ], + "NS4A": [], + "NS4B": [], + "NS5": [ + "R525C" + ], + "PRO": [] + } + }, + "NODE_0000001": { + "aa_muts": { + "2K": [], + "CA": [ + "D107E" + ], + "ENV": [], + "MP": [], + "NS1": [ + "R324W" + ], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [ + "T833A" + ], + "PRO": [] + } + }, + "NODE_0000002": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [ + "M349V" + ], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "NODE_0000003": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "NODE_0000004": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "NODE_0000005": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "NODE_0000006": { + "aa_muts": {} + }, + "NODE_0000007": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "NODE_0000008": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "PAN/CDC_259359_V1_V3/2015": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [ + "M32I" + ], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "PRVABC59": { + "aa_muts": { + "2K": [], + "CA": [ + "I80T" + ], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [ + "A91V" + ], + "PRO": [] + } + }, + "VEN/UF_1/2016": { + "aa_muts": { + "2K": [], + "CA": [ + "E76D" + ], + "ENV": [], + "MP": [], + "NS1": [ + "T301P" + ], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [ + "A833T" + ], + "PRO": [] + } + }, + "ZKC2/2016": { + "aa_muts": { + "2K": [], + "CA": [ + "S109N" + ], + "ENV": [ + "K419R" + ], + "MP": [], + "NS1": [ + "R324Q" + ], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [ + "V114M", + "N624S", + "K670R" + ], + "PRO": [] + } + } + } +} diff --git a/tests/functional/clades/clades.json b/tests/functional/clades/clades.json new file mode 100644 index 000000000..bcb32caa4 --- /dev/null +++ b/tests/functional/clades/clades.json @@ -0,0 +1,66 @@ +{ + "generated_by": { + "program": "augur", + "version": "13.1.2" + }, + "nodes": { + "BRA/2016/FC_6706": { + "clade_membership": "unassigned" + }, + "COL/FLR_00008/2015": { + "clade_membership": "B.1" + }, + "Colombia/2016/ZC204Se": { + "clade_membership": "B" + }, + "DOM/2016/BB_0183": { + "clade_membership": "A" + }, + "EcEs062_16": { + "clade_membership": "A" + }, + "HND/2016/HU_ME59": { + "clade_membership": "A.1" + }, + "NODE_0000001": { + "clade_annotation": "B", + "clade_membership": "B" + }, + "NODE_0000002": { + "clade_membership": "A" + }, + "NODE_0000003": { + "clade_annotation": "A", + "clade_membership": "A" + }, + "NODE_0000004": { + "clade_annotation": "A.1", + "clade_membership": "A.1" + }, + "NODE_0000005": { + "clade_membership": "unassigned" + }, + "NODE_0000006": { + "clade_membership": "unassigned" + }, + "NODE_0000007": { + "clade_membership": "B" + }, + "NODE_0000008": { + "clade_annotation": "B.1", + "clade_membership": "B.1" + }, + "PAN/CDC_259359_V1_V3/2015": { + "clade_membership": "B" + }, + "PRVABC59": { + "clade_membership": "A.1" + }, + "VEN/UF_1/2016": { + "clade_membership": "B.1" + }, + "ZKC2/2016": { + "clade_membership": "A" + } + } +} \ No newline at end of file diff --git a/tests/functional/clades/clades.tsv b/tests/functional/clades/clades.tsv new file mode 100644 index 000000000..e2231c726 --- /dev/null +++ b/tests/functional/clades/clades.tsv @@ -0,0 +1,14 @@ +clade gene site alt + +A nuc 2517 C +A nuc 3972 T + +B CA 107 E +B NS1 324 W + +# Inherited, nested clade +A.1 clade A +A.1 nuc 5298 G + +B.1 clade B +B.1 nuc 2895 A \ No newline at end of file diff --git a/tests/functional/clades/nt_muts_small.json b/tests/functional/clades/nt_muts_small.json new file mode 100644 index 000000000..b7465e6e3 --- /dev/null +++ b/tests/functional/clades/nt_muts_small.json @@ -0,0 +1,248 @@ +{ + "nodes": { + "BRA/2016/FC_6706": { + "muts": [ + "T876C", + "C2646T", + "A4341G", + "C5988T", + "T6618C", + "A7116G", + "C7488T", + "T7506C", + "G8040A", + "A8820G", + "C9066T", + "T9717C" + ] + }, + "COL/FLR_00008/2015": { + "muts": [ + "T1506C", + "C2977A", + "G3347A", + "G5655A", + "T8445C" + ] + }, + "Colombia/2016/ZC204Se": { + "muts": [ + "C507A", + "C1233T", + "C2163T", + "T7843C", + "T10332C" + ] + }, + "DOM/2016/BB_0183": { + "muts": [ + "T489C", + "T2043C", + "T2358C", + "T2418C", + "C2700T", + "T2724C", + "T2823C", + "C2844T", + "A3095G", + "T3982A", + "C5232T", + "C5352T", + "T5823C", + "C6105T", + "C6261T", + "T6891C", + "T8028C", + "G8604A", + "A8614G", + "C9270T", + "G9294A", + "A9599G", + "T10284G", + "T10632C" + ] + }, + "EcEs062_16": { + "muts": [ + "T1275C", + "T1858C", + "T2409C", + "A2754G", + "A3780G", + "G4971T", + "C5532T", + "G5751A", + "G6453C", + "A6873G", + "C8232A", + "T8553C", + "C8850T", + "A9420G", + "G9933T", + "C10098T", + "A10347G", + "T10372C" + ] + }, + "HND/2016/HU_ME59": { + "muts": [ + "G2771C", + "G3600A", + "G3846A", + "T4716C", + "G4767A", + "T4888C", + "G5178A", + "A6310C", + "T6708C", + "C6720T", + "A6945G", + "C9223T", + "T9516C" + ] + }, + "NODE_0000001": { + "muts": [ + "T411A", + "T738C", + "C858T", + "G864T", + "C1381T", + "C3442T", + "A3894G", + "C5991T", + "C9279T", + "A10147G" + ] + }, + "NODE_0000002": { + "muts": [ + "A3517G", + "G6966A" + ] + }, + "NODE_0000003": { + "muts": [ + "T2517C", + "C3972T" + ] + }, + "NODE_0000004": { + "muts": [ + "A5298G" + ] + }, + "NODE_0000005": { + "muts": [] + }, + "NODE_0000006": { + "muts": [] + }, + "NODE_0000007": { + "muts": [ + "T1029C", + "T3705C" + ] + }, + "NODE_0000008": { + "muts": [ + "G2895A", + "T3804C", + "C4311T", + "T4605G", + "C6643T", + "T8862G" + ] + }, + "PAN/CDC_259359_V1_V3/2015": { + "muts": [ + "G23A", + "A35T", + "G40A", + "T2538C", + "A3750G", + "G3771A", + "G4302A", + "C7056T", + "T10344C" + ] + }, + "PRVABC59": { + "muts": [ + "T329C", + "T762C", + "G1170T", + "G1458A", + "A1482G", + "C1887T", + "C2838T", + "T3549C", + "T3576C", + "C4866T", + "T5070C", + "T5139C", + "T6612A", + "T6654C", + "T7077C", + "T7134C", + "C7488T", + "C7922T", + "T8835C", + "T9000C", + "C9225T", + "C9279T" + ] + }, + "VEN/UF_1/2016": { + "muts": [ + "A3C", + "G318T", + "G438T", + "C1233T", + "C1416T", + "A3373C", + "C8016T", + "G10147A" + ] + }, + "ZKC2/2016": { + "muts": [ + "T249C", + "G416A", + "C789T", + "A1158G", + "T2032C", + "T2076C", + "A2216G", + "C2637T", + "T3321C", + "G3443A", + "C3501T", + "C4428T", + "T4602A", + "A4671G", + "T4740C", + "T5286C", + "T5676C", + "T6405C", + "C6475T", + "T7026C", + "G7230A", + "C7500T", + "G7990A", + "C8145T", + "T8391C", + "T8652C", + "C8748T", + "A9417G", + "A9521G", + "T9595C", + "A9659G", + "A9711G", + "A9768G", + "A10375G", + "C10596T" + ] + } + } +} diff --git a/tests/functional/clades/tree.nwk b/tests/functional/clades/tree.nwk new file mode 100644 index 000000000..e7b44517a --- /dev/null +++ b/tests/functional/clades/tree.nwk @@ -0,0 +1 @@ +((Colombia/2016/ZC204Se:0.00105368,(PAN/CDC_259359_V1_V3/2015:0.00076051,(COL/FLR_00008/2015:0.00044440,VEN/UF_1/2016:0.00089377)NODE_0000008:0.00038502)NODE_0000007:0.00019253)NODE_0000001:0.00080159,(BRA/2016/FC_6706:0.00214920,(ZKC2/2016:0.00173693,(HND/2016/HU_ME59:0.00206150,PRVABC59:0.00135309)NODE_0000004:0.00013537,(EcEs062_16:0.00175918,DOM/2016/BB_0183:0.00184905)NODE_0000002:0.00021565)NODE_0000003:0.00013737)NODE_0000005:0.00019772)NODE_0000006:0.00100000; diff --git a/tests/test_clades.py b/tests/test_clades.py new file mode 100644 index 000000000..58fd7bfab --- /dev/null +++ b/tests/test_clades.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +import pytest +import pathlib +import networkx as nx + +from augur.clades import read_in_clade_definitions + +def test_read_in_clade_definitions_simple(): + clades = read_in_clade_definitions("tests/data/clades/simple_clades.tsv") + assert clades == { + 'Clade_1': [('ctpE', 80, 'D')], + 'Clade_2': [('nuc', 30641, 'T')], + 'Clade_3': [('nuc', 444295, 'A'), ('pks8', 633, 'T')] + } + +def test_read_in_clade_definitions_with_empty_lines(): + clades = read_in_clade_definitions("tests/data/clades/empty_lines_clades.tsv") + assert clades == { + 'Clade_1': [('ctpE', 80, 'D')], + 'Clade_2': [('nuc', 30641, 'T')], + 'Clade_3': [('nuc', 444295, 'A'), ('pks8', 633, 'T')] + } + +def test_read_in_clade_definitions_with_comments(): + clades = read_in_clade_definitions("tests/data/clades/commented_clades.tsv") + assert clades == { + 'Clade_1': [('ctpE', 80, 'D')], + 'Clade_2': [('nuc', 30641, 'T')], + 'Clade_3': [('nuc', 444295, 'A'), ('pks8', 633, 'T')] + } + +def test_read_in_clade_definitions_inherit_simple(): + clades = read_in_clade_definitions("tests/data/clades/inherit_clades.tsv") + assert clades == { + 'Clade_1': [('ctpE', 80, 'D')], + 'Clade_2': [('nuc', 30641, 'T')], + 'Clade_3': [('nuc', 30641, 'T'), ('pks8', 633, 'T')] + } + +def test_read_in_clade_definitions_inherit_chained(): + clades = read_in_clade_definitions("tests/data/clades/inherit_chained_clades.tsv") + assert clades == { + 'Clade_1': [('ctpE', 80, 'D')], + 'Clade_2': [('ctpE', 80, 'D'),('nuc', 30641, 'T')], + 'Clade_3': [('ctpE', 80, 'D'),('nuc', 30641, 'T'), ('pks8', 633, 'T')] + } + +def test_read_in_clade_definitions_inherit_cycle_error(): + with pytest.raises(ValueError): + read_in_clade_definitions("tests/data/clades/inherit_cycle_clades.tsv") + +def test_read_in_clade_definitions_multiple_inheritance_error(): + with pytest.raises(ValueError): + read_in_clade_definitions("tests/data/clades/multiple_inheritance_clades.tsv") + +def test_read_in_clade_definitions_inheritance_from_nonexistent_clade_error(): + with pytest.raises(ValueError): + read_in_clade_definitions("tests/data/clades/nonexistent_clade_inheritance_clades.tsv") + +def test_read_in_clade_definitions_inheritance_from_self_error(): + with pytest.raises(ValueError): + read_in_clade_definitions("tests/data/clades/self_inherit_clades.tsv")