From 0bd4c64d1c6178168c047a0b6924104fd8569205 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 21 Mar 2024 10:50:33 -0400 Subject: [PATCH 01/51] Word smithing introduction --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e84dfc4..e0553e5 100644 --- a/README.md +++ b/README.md @@ -34,15 +34,15 @@ Table of contents generated with markdown-toc # Introduction -A common function for many tools in bacterial typing is performing similarity searching using NCBI [blast](https://blast.ncbi.nlm.nih.gov/Blast.cgi). Blast provides a robust command line interface for constructing and using databases for similarity searching and is ubiquitous. There are many typing applications where custom code is written around the blast command line interface to perform searches for a variety of downstream applications. For instance, identification of specific target sequences within an assembly to perform gene-by-gene phylogenetic analysis (MLST, cgMLST, wgMLST), antimicrobial resistance gene detection, virulence gene detection, and in silico predictions of phenotypes such as serotype is a major application within public health. The typical approach is to bundle the search-based logic with additional specialized logic for performing the desired analysis. +A common function for many tools in bacterial typing is performing similarity searching using NCBI [blast](https://blast.ncbi.nlm.nih.gov/Blast.cgi). Blast provides a robust command line interface for constructing and using databases for similarity searching and is ubiquitous. There are many typing applications where custom code is written around the blast command line interface to perform searches for various downstream applications. For instance, the identification of specific target sequences within an assembly to perform gene-by-gene phylogenetic analysis (MLST, cgMLST, wgMLST), antimicrobial resistance gene detection, virulence gene detection, and in silico predictions of phenotypes such as serotype is an important application within public health. The typical approach is bundling the search-based logic with additional specialized logic to perform the desired analysis. -Decentralized allele calling has become a pressing concern by public health laboratories due to the increased use of whole genome sequencing (WGS) as part of outbreak detection and surveillance of a variety of pathogens. Gene-by-gene approaches have a variety of benefits for species typing which include a standardized set of loci for estimating genetic similarity between samples. This standardization allows for interoperability between different groups and also has the benefit of compression, simplifying genetic comparisons to use a simple hamming distance based on allele identifiers instead of a whole sequence. However, a limitation of this approach is the requirement of a centralized authority to issue unique allele identifiers and this poses multiple problems for operationalization such as privacy and connectivity. Despite this limitation PulseNet International has adopted gene-by-gene analysis as its preferred analytical approach for estimating genetic similarity between samples for routine operations with the limitation that comparing between jurisdictions requires the sharing of the primary sequence data rather than the allele identifiers. +Decentralized allele calling has become a pressing concern for public health laboratories due to the increased use of whole genome sequencing (WGS) for outbreak detection and surveillance of various pathogens. Gene-by-gene approaches have a variety of benefits for species typing, including a standardized set of loci for estimating genetic similarity between samples. This standardization allows for interoperability between different groups. Also, it has the benefit of compression, simplifying genetic comparisons by using a simple hamming distance based on allele identifiers instead of a whole sequence. However, a limitation of this approach is the requirement of a centralized authority to issue unique allele identifiers, which poses multiple operational problems, such as privacy and connectivity. Despite this limitation, PulseNet International has adopted gene-by-gene analysis as its preferred analytical approach for estimating genetic similarity between samples for routine operations, with the limitation that comparing between jurisdictions requires the sharing of the primary sequence data rather than the allele identifiers. -In recent years, the concept of using cryptographic hashes of the allele sequence itself have gained traction in a variety of different allele calling software, such as [Chewbbaca](https://github.com/B-UMMI/chewBBACA), to provide decentralized allele identifiers. Hashing the sequence yields a determinist and fixed-size hash value which can be compared in the same manner as integers. There are numerous hash functions with different strengths and weaknesses but MD5 digests have broad adoption in the software community and are routinely used to provide some assurance that a transferred file has arrived intact. The choice of md5 hash provides 16^32, possible hashes. There is a theoretical chance of hash collisions, i.e., different sequences resulting in the same hash, but as the number of allele sequences for each gene in databases is relatively low, this should be an uncommon occurrence. Collisions in this case would result in profiles appearing more similar than they truly are at the sequence level. In addition, the chances of multiple occurrences of collisions within a profile would be infinitely small. +In recent years, the concept of using cryptographic hashes of the allele sequence itself has gained traction in various allele-calling software, such as [Chewbbaca](https://github.com/B-UMMI/chewBBACA), to provide decentralized allele identifiers. Hashing the sequence yields a determinist and fixed-size hash value, which can be compared in the same manner as integers. There are numerous hash functions with different strengths and weaknesses, but MD5 digests have been broadly adopted in the software community. They are routinely used to assure that a transferred file has arrived intact. The choice of md5 hash provides 16^32, possible hashes. There is a theoretical chance of hash collisions, i.e., different sequences resulting in the same hash. However, as the number of allele sequences for each gene in databases is relatively low, this should be uncommon. In this case, collisions would result in profiles appearing more similar than they are at the sequence level. In addition, the chances of multiple occurrences of collisions within a profile would be infinitely small. -The motivation for developing locidex is the need a common searching engine for various loci based typing applications such as: gene-by-gene (mlst, cgMLST, wgMLST, rmlst), in silico serotyping, gene-based phenotype predictions (amr, virulence, pathotype, toxin typing), marker-based typing (16S). The tool must provide custom criteria filtering by loci, and produce multiple formats for downstream applications. It must be compatible with an HSP environment and not encounter any locking issues where multiple processes may try to change the data at the same time. [THIS SECTION WILL NEED EDITING]The logic for allele calling is greatly simplified by leveraging existing annotations from tools such as [prodigal](https://github.com/hyattpd/Prodigal), [prokka](https://github.com/tseemann/prokka), [bakta](https://github.com/oschwengers/bakta) to delineate the boundaries of the sequences to be queried and hashed to produce allele identifiers. A common issue in matching applications is that ranges of identity and coverage for a match will vary by locus and so locidex builds into its database structure control over these attributes at a locus level allowing for high variability databases to be used without building custom logic downstream. This is particularly important when lengths of loci can exhibit considerable variability as is the case for genes of interest for typing applications. This provides greater flexibility for the designation of ideal thresholds for a given application. However, these values can be overridden using the report module filtering parameters as well as by modifying the values within the database. [END] +The motivation for developing locidex is the need for a common search engine for various loci-based typing applications such as gene-by-gene (MLST, cgMLST, wgMLST, rMLST), in silico serotyping, gene-based phenotype predictions (amr, virulence, pathotype, toxin typing), marker-based typing (16S). The tool must provide custom criteria filtering by loci and produce multiple formats for downstream applications. It must be compatible with an HSP environment and not encounter locking issues where multiple processes may try to change the data simultaneously. It should provide input sequence data flexibility to user which includes support for 1) existing sequence annotations 2) de novo annotations based on contig input 3) capable of extracting sequence regions of interest. The logic for allele calling is greatly simplified by leveraging existing annotations from tools such as [prodigal](https://github.com/hyattpd/Prodigal), [prokka](https://github.com/tseemann/prokka), [bakta](https://github.com/oschwengers/bakta) to delineate the boundaries of the sequences to be queried and hashed to produce allele identifiers. However, not all loci are protein coding, have inconsistent annotations, or are not a complete OFR, and so Locidex has built in support for extracting regions of interest from a query genome. A common issue in matching applications is that ranges of identity and coverage for a match will vary by locus. So, locidex builds control over these attributes at a locus level into its database structure, allowing for high variability databases to be used without custom logic being built downstream. This is particularly important when lengths of loci can exhibit considerable variability, as is the case for genes of interest in typing applications. This provides greater flexibility for the designation of ideal thresholds for a given application. However, these values can be overridden using the report module filtering parameters and by modifying the values within the database. -[Chewbbaca](https://github.com/B-UMMI/chewBBACA) is an excellent choice for an open source allele caller and provides many advanced features for developing, curating and using gene-by-gene schemes. It provides a great deat of additional information regarding partial gene sequences. For R&D applications, this functionality can be extremely useful. However, for some operational contexts, the design of [Chewbbaca](https://github.com/B-UMMI/chewBBACA) provides undesirable information and at present it has issues with multiple instances using the same database at once with novel allele detection enabled ([B-UMMI/chewBBACA#168](https://github.com/B-UMMI/chewBBACA/issues/168)). Locidex is meant to be optimized for routine operation level searching where it is useful to have default parameters that are set for the user to have reproducibility combined with flexibility to apply multiple filtering parameters on the sequence store after the fact. This allows exploring different thresholds without the need to recompute blast searches. In addition, there is often a desire to include additional information about a given locus such as different identifiers, functional properties, and phenotypic effects. The database format of locidex allows inclusion of any number of fields bundled into a search result object for users to describe their data conveniently during downstream analysis. This functionality allows for different use cases of data from a common data store. Locidex does not have the full features for a gene-by-gene software package like [Chewbbaca](https://github.com/B-UMMI/chewBBACA) but can be used to achieve similar results while being a more generic tool kit for blast searches, similar to [abricate](https://github.com/tseemann/abricate). +[Chewbbaca](https://github.com/B-UMMI/chewBBACA) is an excellent choice for an open-source allele caller and provides many advanced features for developing, curating and using gene-by-gene schemes. It provides a great deal of additional information regarding partial gene sequences. For R&D applications, this functionality can be extremely useful. However, for some operational contexts, the design of [Chewbbaca](https://github.com/B-UMMI/chewBBACA) provides undesirable information, and at present, it has issues with multiple instances using the same database at once with novel allele detection enabled ([B-UMMI/chewBBACA#168](https://github.com/B-UMMI/chewBBACA/issues/168)). Locidex is meant to be optimized for routine operation-level searching. It is helpful to set default parameters for the user to have reproducibility and flexibility when applying multiple filtering parameters on the sequence store after the fact. This allows exploring different thresholds without the need to recompute blast searches. In addition, there is often a desire to include additional information about a given locus, such as different identifiers, functional properties, and phenotypic effects. The database format of locidex allows the inclusion of any number of fields bundled into a search result object for users to describe their data conveniently during downstream analysis. This functionality allows for different data use cases from a common data store. Locidex does not have the full features for a gene-by-gene software package like [Chewbbaca](https://github.com/B-UMMI/chewBBACA) but can be used to achieve similar results while being a more generic tool kit for blast searches, similar to [abricate](https://github.com/tseemann/abricate). ## Citation From 5797705e9854e70abf8fc8ca06f0f28659e78226 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 21 Mar 2024 10:51:46 -0400 Subject: [PATCH 02/51] Word smithing introduction --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e0553e5..0c66f8a 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ Decentralized allele calling has become a pressing concern for public health lab In recent years, the concept of using cryptographic hashes of the allele sequence itself has gained traction in various allele-calling software, such as [Chewbbaca](https://github.com/B-UMMI/chewBBACA), to provide decentralized allele identifiers. Hashing the sequence yields a determinist and fixed-size hash value, which can be compared in the same manner as integers. There are numerous hash functions with different strengths and weaknesses, but MD5 digests have been broadly adopted in the software community. They are routinely used to assure that a transferred file has arrived intact. The choice of md5 hash provides 16^32, possible hashes. There is a theoretical chance of hash collisions, i.e., different sequences resulting in the same hash. However, as the number of allele sequences for each gene in databases is relatively low, this should be uncommon. In this case, collisions would result in profiles appearing more similar than they are at the sequence level. In addition, the chances of multiple occurrences of collisions within a profile would be infinitely small. -The motivation for developing locidex is the need for a common search engine for various loci-based typing applications such as gene-by-gene (MLST, cgMLST, wgMLST, rMLST), in silico serotyping, gene-based phenotype predictions (amr, virulence, pathotype, toxin typing), marker-based typing (16S). The tool must provide custom criteria filtering by loci and produce multiple formats for downstream applications. It must be compatible with an HSP environment and not encounter locking issues where multiple processes may try to change the data simultaneously. It should provide input sequence data flexibility to user which includes support for 1) existing sequence annotations 2) de novo annotations based on contig input 3) capable of extracting sequence regions of interest. The logic for allele calling is greatly simplified by leveraging existing annotations from tools such as [prodigal](https://github.com/hyattpd/Prodigal), [prokka](https://github.com/tseemann/prokka), [bakta](https://github.com/oschwengers/bakta) to delineate the boundaries of the sequences to be queried and hashed to produce allele identifiers. However, not all loci are protein coding, have inconsistent annotations, or are not a complete OFR, and so Locidex has built in support for extracting regions of interest from a query genome. A common issue in matching applications is that ranges of identity and coverage for a match will vary by locus. So, locidex builds control over these attributes at a locus level into its database structure, allowing for high variability databases to be used without custom logic being built downstream. This is particularly important when lengths of loci can exhibit considerable variability, as is the case for genes of interest in typing applications. This provides greater flexibility for the designation of ideal thresholds for a given application. However, these values can be overridden using the report module filtering parameters and by modifying the values within the database. +The motivation for developing locidex is the need for a common search engine for various loci-based typing applications such as gene-by-gene (MLST, cgMLST, wgMLST, rMLST), in silico serotyping, gene-based phenotype predictions (amr, virulence, pathotype, toxin typing), marker-based typing (16S). The tool must provide custom criteria filtering by loci and produce multiple formats for downstream applications. It must be compatible with an HPC environment and not encounter locking issues where multiple processes may try to change the data simultaneously. It should provide input sequence data flexibility to user which includes support for 1) existing sequence annotations 2) de novo annotations based on contig input 3) capable of extracting sequence regions of interest. The logic for allele calling is greatly simplified by leveraging existing annotations from tools such as [prodigal](https://github.com/hyattpd/Prodigal), [prokka](https://github.com/tseemann/prokka), [bakta](https://github.com/oschwengers/bakta) to delineate the boundaries of the sequences to be queried and hashed to produce allele identifiers. However, not all loci are protein coding, have inconsistent annotations, or are not a complete OFR, and so Locidex has built in support for extracting regions of interest from a query genome. A common issue in matching applications is that ranges of identity and coverage for a match will vary by locus. So, locidex builds control over these attributes at a locus level into its database structure, allowing for high variability databases to be used without custom logic being built downstream. This is particularly important when lengths of loci can exhibit considerable variability, as is the case for genes of interest in typing applications. This provides greater flexibility for the designation of ideal thresholds for a given application. However, these values can be overridden using the report module filtering parameters and by modifying the values within the database. [Chewbbaca](https://github.com/B-UMMI/chewBBACA) is an excellent choice for an open-source allele caller and provides many advanced features for developing, curating and using gene-by-gene schemes. It provides a great deal of additional information regarding partial gene sequences. For R&D applications, this functionality can be extremely useful. However, for some operational contexts, the design of [Chewbbaca](https://github.com/B-UMMI/chewBBACA) provides undesirable information, and at present, it has issues with multiple instances using the same database at once with novel allele detection enabled ([B-UMMI/chewBBACA#168](https://github.com/B-UMMI/chewBBACA/issues/168)). Locidex is meant to be optimized for routine operation-level searching. It is helpful to set default parameters for the user to have reproducibility and flexibility when applying multiple filtering parameters on the sequence store after the fact. This allows exploring different thresholds without the need to recompute blast searches. In addition, there is often a desire to include additional information about a given locus, such as different identifiers, functional properties, and phenotypic effects. The database format of locidex allows the inclusion of any number of fields bundled into a search result object for users to describe their data conveniently during downstream analysis. This functionality allows for different data use cases from a common data store. Locidex does not have the full features for a gene-by-gene software package like [Chewbbaca](https://github.com/B-UMMI/chewBBACA) but can be used to achieve similar results while being a more generic tool kit for blast searches, similar to [abricate](https://github.com/tseemann/abricate). From 7f0124433798ed2e4f315642ddfab863f6d6b4c9 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 26 Mar 2024 15:35:21 -0400 Subject: [PATCH 03/51] corrected behaviour of extract for overlaping loci --- locidex/classes/extractor.py | 85 +++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 6 deletions(-) diff --git a/locidex/classes/extractor.py b/locidex/classes/extractor.py index 57f6876..cfa9cb1 100644 --- a/locidex/classes/extractor.py +++ b/locidex/classes/extractor.py @@ -4,7 +4,8 @@ class extractor: seqs = {} df = pd.DataFrame() - def __init__(self,df,seq_data,sseqid_col,queryid_col,qstart_col,qend_col,qlen_col,sstart_col,send_col,slen_col,sstrand_col,bitscore_col,overlap_thresh=1,extend_threshold_ratio = 0.2,filter_contig_breaks=True): + def __init__(self,df,seq_data,sseqid_col,queryid_col,qstart_col,qend_col,qlen_col,sstart_col,send_col,slen_col,sstrand_col,bitscore_col,overlap_thresh=100,extend_threshold_ratio = 0.2,filter_contig_breaks=True): + print('hi') self.filter_contig_breaks = filter_contig_breaks self.df = self.set_extraction_pos(df, sstart_col, send_col) self.is_complete(self.df,qstart_col,qend_col,qlen_col) @@ -18,10 +19,14 @@ def __init__(self,df,seq_data,sseqid_col,queryid_col,qstart_col,qend_col,qlen_co self.df[c] = self.df[c].apply(lambda x: x - 1) #self.df = self.fix_postioning(self.df) - sort_cols = [sseqid_col, queryid_col, sstart_col, send_col, bitscore_col] + sort_cols = ['locus_name', sseqid_col, sstart_col, send_col, bitscore_col] ascending_cols = [True, True, True, True, False] + self.df = self.df.sort_values(sort_cols,ascending=ascending_cols).reset_index(drop=True) + self.df = self.recursive_filter_redundant_queries(self.df, 'locus_name', sseqid_col, bitscore_col, + sort_cols, ascending_cols, overlap_threshold=1) + #self.df = self.recursive_filter_overlap_records(self.df, sseqid_col, bitscore_col, sort_cols, ascending_cols, overlap_threshold=overlap_thresh) - self.df = self.extend(self.df,sseqid_col, queryid_col, qstart_col, qend_col, sstart_col,send_col,slen_col, qlen_col, bitscore_col, overlap_threshold=1) + self.df = self.extend(self.df,sseqid_col, queryid_col, qstart_col, qend_col, sstart_col,send_col,slen_col, qlen_col, bitscore_col, overlap_threshold=overlap_thresh) #self.df = self.recursive_filter_overlap_records(self.df, sseqid_col, bitscore_col, sort_cols, ascending_cols, # overlap_threshold=overlap_thresh) @@ -64,6 +69,72 @@ def set_extraction_pos(self,df,start_col,end_col): df.loc[idx, 'ext_end'] = end return df + def recursive_filter_redundant_queries(self,df, locus_col, sseqid_col, bitscore_col, sort_cols, ascending_cols, overlap_threshold=1): + size = len(df) + prev_size = 0 + while size != prev_size: + df = df.sort_values(sort_cols, + ascending=ascending_cols).reset_index(drop=True) + df = self.remove_redundant_queries(df, locus_col, sseqid_col, bitscore_col, overlap_threshold=overlap_threshold) + prev_size = size + size = len(df) + + return df.sort_values(sort_cols,ascending=ascending_cols).reset_index(drop=True) + + def remove_redundant_queries(self,df,locus_col,sseqid_col, bitscore_col, overlap_threshold=1): + seq_id_list = list(df[sseqid_col].unique()) + filter_df = [] + for seqid in seq_id_list: + subset = df[df[sseqid_col] == seqid] + prev_locus_id = '' + prev_contig_id = '' + prev_index = -1 + prev_contig_start = -1 + prev_contig_end = -1 + prev_score = 0 + filter_rows = [] + for idx, row in subset.iterrows(): + contig_id = row[sseqid_col] + contig_start = row['ext_start'] + contig_end = row['ext_end'] + score = float(row[bitscore_col]) + locus_id = row[locus_col] + + if prev_contig_id == '': + prev_index = idx + prev_contig_id = contig_id + prev_contig_start = contig_start + prev_contig_end = contig_end + prev_score = score + prev_locus_id = locus_id + continue + + if locus_id == prev_locus_id: + if (contig_start >= prev_contig_start and contig_start <= prev_contig_end) or ( + contig_end >= prev_contig_start and contig_end <= prev_contig_end): + overlap = abs(contig_start - prev_contig_end) + + if overlap > overlap_threshold: + if prev_score < score: + filter_rows.append(prev_index) + else: + filter_rows.append(idx) + + prev_index = idx + prev_contig_id = contig_id + prev_contig_start = contig_start + prev_contig_end = contig_end + prev_score = score + prev_locus_id = locus_id + + + valid_ids = list( set(subset.index) - set(filter_rows) ) + + filter_df.append(subset.filter(valid_ids, axis=0)) + + + return pd.concat(filter_df, ignore_index=True) + def fix_postioning(self,df): for idx, row in df.iterrows(): start = row['ext_start'] @@ -212,9 +283,10 @@ def extract_seq(self,loci_data,seq_data): return seqs def extend(self,df,seqid_col, queryid_col, qstart_col, qend_col, sstart_col,send_col,slen_col, qlen_col, bitscore_col, overlap_threshold=1): - sort_cols = [seqid_col,'ext_start', 'ext_end', bitscore_col] - ascending_cols = [True, True, True, False] - df = self.recursive_filter_overlap_records(df, seqid_col, bitscore_col, sort_cols, ascending_cols, overlap_threshold) + sort_cols = ['locus_name',seqid_col,'ext_start', 'ext_end', bitscore_col] + ascending_cols = [True, True, True, True, False] + + df = self.recursive_filter_overlap_records(df, 'locus_name', bitscore_col, sort_cols, ascending_cols, overlap_threshold) df = df.sort_values(sort_cols, ascending=ascending_cols).reset_index(drop=True) @@ -308,6 +380,7 @@ def extend(self,df,seqid_col, queryid_col, qstart_col, qend_col, sstart_col,send return df def group_by_locus(self,df,seqid_col,query_col,qlen_col,extend_threshold_ratio = 0.2): + print(self.df.columns) sort_cols = ['locus_name',query_col,'ext_start', 'ext_end'] ascending_cols = [True, True, True, True] df = df.sort_values(sort_cols, From cc557c9941b3b89a711f403520bc6bd136b5a963 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 26 Mar 2024 21:04:46 -0400 Subject: [PATCH 04/51] added new report mode --- locidex/report.py | 52 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/locidex/report.py b/locidex/report.py index bc49be8..5950833 100644 --- a/locidex/report.py +++ b/locidex/report.py @@ -22,13 +22,14 @@ class CustomFormatter(ArgumentDefaultsHelpFormatter, RawDescriptionHelpFormatter parser.add_argument('-i','--input', type=str, required=True,help='Input file to report') parser.add_argument('-o', '--outdir', type=str, required=True, help='Output file to put results') parser.add_argument('-n', '--name', type=str, required=False, help='Sample name to include default=filename') - parser.add_argument('-m', '--mode', type=str, required=False, help='Allele profile assignment [normal,conservative]',default='normal') + parser.add_argument('-m', '--mode', type=str, required=False, help='Allele profile assignment [normal,conservative,fuzzy]',default='normal') parser.add_argument('-p', '--prop', type=str, required=False, help='Metadata label to use for aggregation',default='locus_name') parser.add_argument('-a', '--max_ambig', type=int, required=False, help='Maximum number of ambiguous characters allowed in a sequence',default=0) parser.add_argument('-s', '--max_stop', type=int, required=False, help='Maximum number of internal stop codons allowed in a sequence',default=0) parser.add_argument('--report_format', type=str, required=False, help='Report format of parsed results [profile]',default='profile') - + parser.add_argument('-r', '--match_ident', type=float, required=False, + help='Report match allele if percent difference is less than this value',default=100) parser.add_argument('-V', '--version', action='version', version="%(prog)s " + __version__) parser.add_argument('-f', '--force', required=False, help='Overwrite existing directory', action='store_true') @@ -43,12 +44,13 @@ class seq_reporter: db_seq_info = {} failed_seqids = set() - def __init__(self,data_dict,method='nucleotide',mode='normal',label='locus_name',filters={},max_ambig=0,max_int_stop=0): + def __init__(self,data_dict,method='nucleotide',mode='normal',label='locus_name',filters={},max_ambig=0,max_int_stop=0,match_ident=0): self.max_ambig_count = max_ambig self.max_int_stop_count = max_int_stop self.label = label self.method = method self.mode = mode + self.match_ident = match_ident self.data_dict = data_dict self.db_seq_info = self.data_dict["db_seq_info"] self.query_seq_data = self.data_dict["query_data"]['query_seq_data'] @@ -142,7 +144,7 @@ def calc_query_best_hit(self): for qid in self.query_hits: best_hits[qid] = {} for dbtype in self.query_hits[qid]: - best_hits[dbtype] = [] + best_hits[qid][dbtype] = [] hit_ids = [] hit_bit = [] for hit in self.query_hits[qid][dbtype]: @@ -153,9 +155,10 @@ def calc_query_best_hit(self): max_bit = max(hit_bit) top_ids = [] for idx, value in enumerate(hit_bit): - if value == max_bit: + if value >= max_bit: top_ids.append(idx) - best_hits[dbtype] = top_ids + best_hits[qid][dbtype] = top_ids + return best_hits def get_hit_locinames(self): @@ -183,7 +186,8 @@ def get_loci_to_query_map(self,hit_names,dbtype): return loci_lookup def allele_assignment(self,dbtype): - query_best_hits = self.calc_query_best_hit() + self.query_best_hits = self.calc_query_best_hit() + hit_loci_names = self.get_hit_locinames() loci_lookup = self.get_loci_to_query_map(hit_loci_names,dbtype) @@ -217,11 +221,12 @@ def allele_assignment(self,dbtype): continue for qid in matches: - if not dbtype in query_best_hits[qid]: + if not dbtype in self.query_best_hits[qid]: continue - best_hits = query_best_hits[qid][dbtype] + best_hits = self.query_best_hits[qid][dbtype] best_hit_names = set() for l in best_hits: + l = str(l) hinfo = self.db_seq_info[l] best_hit_names.add(hinfo['locus_name']) if len(best_hit_names) == 1: @@ -233,6 +238,18 @@ def allele_assignment(self,dbtype): self.locus_profile = profile self.populate_profile() + + def get_matching_ref_seq_info(self,qid, dbtype): + for hit in self.query_hits[qid][dbtype]: + hit_id = str(hit['sseqid']) + pident = hit['pident'] + if pident < self.match_ident: + continue + hinfo = self.db_seq_info[hit_id] + hit_name = hinfo['locus_name'] + return hinfo + return {} + def populate_profile(self): for locus_name in self.profile: values = set() @@ -244,7 +261,16 @@ def populate_profile(self): key = "dna_hash" elif self.method == 'protein': key = "aa_hash" - allele_hashes.append(self.query_seq_data[seq_id][key]) + hash_value = self.query_seq_data[seq_id][key] + if self.mode == 'fuzzy': + ref_seq_hitinfo = self.get_matching_ref_seq_info(seq_id, self.method) + if len(ref_seq_hitinfo) > 0: + if self.method == 'nucleotide': + hash_value = ref_seq_hitinfo['dna_seq_hash'] + elif self.method == 'protein': + hash_value = ref_seq_hitinfo['aa_seq_hash'] + + allele_hashes.append(hash_value) num_alleles = len(allele_hashes) if num_alleles > 1 and self.mode == 'conservative': @@ -253,7 +279,10 @@ def populate_profile(self): allele_hashes = calc_md5(["".join([str(x) for x in sorted(allele_hashes)])]) elif num_alleles == 0: allele_hashes = ['-'] + elif self.mode == 'fuzzy': + allele_hashes = calc_md5(["".join([str(x) for x in sorted(allele_hashes)])]) self.profile[locus_name] = ",".join(list(set([str(x) for x in allele_hashes]))) + def extract_hit_data(self,dbtype): @@ -287,6 +316,7 @@ def run(): mode = cmd_args.mode max_ambig = cmd_args.max_ambig max_int_stop = cmd_args.max_stop + match_ident = cmd_args.match_ident if sample_name is None: @@ -310,7 +340,7 @@ def run(): if len(seq_store_dict) == 0: sys.exit() - allele_obj = seq_reporter(seq_store_dict, method='nucleotide', mode=mode, label=label, filters={},max_ambig=max_ambig,max_int_stop=max_int_stop) + allele_obj = seq_reporter(seq_store_dict, method='nucleotide', mode=mode, label=label, filters={},max_ambig=max_ambig,max_int_stop=max_int_stop,match_ident=match_ident) if report_format == 'profile': From 096cc72ac158b2adb6568a6b7dd93af129733135 Mon Sep 17 00:00:00 2001 From: Matthew Wells <76452933+mattheww95@users.noreply.github.com> Date: Thu, 4 Apr 2024 12:35:57 -0500 Subject: [PATCH 05/51] Update version.py version was behind, bumping again --- locidex/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/locidex/version.py b/locidex/version.py index c12f34c..10939f0 100644 --- a/locidex/version.py +++ b/locidex/version.py @@ -1 +1 @@ -__version__ = '0.1.1' \ No newline at end of file +__version__ = '0.1.2' From 47aae9a470ddc6e8762681a411a96f6212fe5f72 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 11 Apr 2024 13:23:16 -0400 Subject: [PATCH 06/51] corrected paralog handelling --- locidex/classes/extractor.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/locidex/classes/extractor.py b/locidex/classes/extractor.py index cfa9cb1..d1316e8 100644 --- a/locidex/classes/extractor.py +++ b/locidex/classes/extractor.py @@ -5,9 +5,9 @@ class extractor: seqs = {} df = pd.DataFrame() def __init__(self,df,seq_data,sseqid_col,queryid_col,qstart_col,qend_col,qlen_col,sstart_col,send_col,slen_col,sstrand_col,bitscore_col,overlap_thresh=100,extend_threshold_ratio = 0.2,filter_contig_breaks=True): - print('hi') self.filter_contig_breaks = filter_contig_breaks self.df = self.set_extraction_pos(df, sstart_col, send_col) + self.is_complete(self.df,qstart_col,qend_col,qlen_col) self.is_contig_boundary(self.df,'ext_start','ext_end',slen_col) if filter_contig_breaks: @@ -17,19 +17,14 @@ def __init__(self,df,seq_data,sseqid_col,queryid_col,qstart_col,qend_col,qlen_co pcols = [qstart_col,qend_col,sstart_col,send_col] for c in pcols: self.df[c] = self.df[c].apply(lambda x: x - 1) - #self.df = self.fix_postioning(self.df) - - sort_cols = ['locus_name', sseqid_col, sstart_col, send_col, bitscore_col] + sort_cols = [sseqid_col, 'locus_name', sstart_col, send_col, bitscore_col] ascending_cols = [True, True, True, True, False] self.df = self.df.sort_values(sort_cols,ascending=ascending_cols).reset_index(drop=True) self.df = self.recursive_filter_redundant_queries(self.df, 'locus_name', sseqid_col, bitscore_col, sort_cols, ascending_cols, overlap_threshold=1) - #self.df = self.recursive_filter_overlap_records(self.df, sseqid_col, bitscore_col, sort_cols, ascending_cols, overlap_threshold=overlap_thresh) - self.df = self.extend(self.df,sseqid_col, queryid_col, qstart_col, qend_col, sstart_col,send_col,slen_col, qlen_col, bitscore_col, overlap_threshold=overlap_thresh) - #self.df = self.recursive_filter_overlap_records(self.df, sseqid_col, bitscore_col, sort_cols, ascending_cols, - # overlap_threshold=overlap_thresh) + self.df = self.extend(self.df,sseqid_col, queryid_col, qstart_col, qend_col, sstart_col,send_col,slen_col, qlen_col, bitscore_col, overlap_threshold=overlap_thresh) self.df = self.set_extraction_pos(self.df, sstart_col, send_col) loci_ranges = self.group_by_locus(self.df,sseqid_col, queryid_col,qlen_col,extend_threshold_ratio) self.seqs = self.extract_seq(loci_ranges, seq_data) @@ -283,13 +278,11 @@ def extract_seq(self,loci_data,seq_data): return seqs def extend(self,df,seqid_col, queryid_col, qstart_col, qend_col, sstart_col,send_col,slen_col, qlen_col, bitscore_col, overlap_threshold=1): - sort_cols = ['locus_name',seqid_col,'ext_start', 'ext_end', bitscore_col] - ascending_cols = [True, True, True, True, False] - - df = self.recursive_filter_overlap_records(df, 'locus_name', bitscore_col, sort_cols, ascending_cols, overlap_threshold) - df = df.sort_values(sort_cols, - ascending=ascending_cols).reset_index(drop=True) - + #sort_cols = [seqid_col, 'locus_name','ext_start', 'ext_end', bitscore_col] + #ascending_cols = [True, True, True, True, False] + #df = self.recursive_filter_overlap_records(df, 'sseqid', bitscore_col, sort_cols, ascending_cols, overlap_threshold) + #df = df.sort_values(sort_cols, + # ascending=ascending_cols).reset_index(drop=True) queries = df[queryid_col].to_list() #Remove incomplete hits when complete ones are present @@ -303,7 +296,6 @@ def extend(self,df,seqid_col, queryid_col, qstart_col, qend_col, sstart_col,send else: filtered.append(subset) df = pd.concat(filtered, ignore_index=True) - trunc_records = df[df['is_complete'] == False] if len(trunc_records) == 0: return df @@ -377,10 +369,10 @@ def extend(self,df,seqid_col, queryid_col, qstart_col, qend_col, sstart_col,send df['is_extended'] = is_extended df['is_5p_extended'] = five_p_ext df['is_3p_extended'] = three_p_ext + return df def group_by_locus(self,df,seqid_col,query_col,qlen_col,extend_threshold_ratio = 0.2): - print(self.df.columns) sort_cols = ['locus_name',query_col,'ext_start', 'ext_end'] ascending_cols = [True, True, True, True] df = df.sort_values(sort_cols, @@ -464,7 +456,6 @@ def group_by_locus(self,df,seqid_col,query_col,qlen_col,extend_threshold_ratio = 'is_3prime_boundary':is_3prime_boundary }) - return loci From fab02729b44f434f02c2b4b2cc1e9dd4a100d861 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 11 Apr 2024 16:09:14 -0400 Subject: [PATCH 07/51] paralog work --- locidex/classes/extractor.py | 78 +++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/locidex/classes/extractor.py b/locidex/classes/extractor.py index d1316e8..0b673ef 100644 --- a/locidex/classes/extractor.py +++ b/locidex/classes/extractor.py @@ -205,6 +205,61 @@ def remove_redundant_hits(self,df,seqid_col, bitscore_col, overlap_threshold=1): return pd.concat(filter_df, ignore_index=True) + def remove_redundant_hits_dual_key(self,df,locus_col, seqid_col, bitscore_col, overlap_threshold=1): + seq_id_list = list(df[seqid_col].unique()) + filter_df = [] + df.to_csv("/home/jarobert/tmp.txt",sep="\t",header=True) + for seqid in seq_id_list: + subset = df[df[seqid_col] == seqid] + prev_contig_id = '' + prev_locus_id = '' + prev_index = -1 + prev_contig_start = -1 + prev_contig_end = -1 + prev_score = 0 + filter_rows = [] + for idx, row in subset.iterrows(): + contig_id = row[seqid_col] + contig_start = row['ext_start'] + contig_end = row['ext_end'] + score = float(row[bitscore_col]) + locus_id = row[locus_col] + + if prev_contig_id == '': + prev_index = idx + prev_contig_id = contig_id + prev_contig_start = contig_start + prev_contig_end = contig_end + prev_score = score + prev_locus_id = locus_id + continue + + if locus_id == prev_locus_id and contig_id == prev_contig_id: + if (contig_start >= prev_contig_start and contig_start <= prev_contig_end) or ( + contig_end >= prev_contig_start and contig_end <= prev_contig_end): + + overlap = abs(contig_start - prev_contig_end) + + if overlap > overlap_threshold: + if prev_score < score: + filter_rows.append(prev_index) + else: + filter_rows.append(idx) + + prev_index = idx + prev_contig_id = contig_id + prev_contig_start = contig_start + prev_contig_end = contig_end + prev_score = score + prev_locus_id = locus_id + + + valid_ids = list( set(subset.index) - set(filter_rows) ) + + filter_df.append(subset.filter(valid_ids, axis=0)) + + + return pd.concat(filter_df, ignore_index=True) def recursive_filter_overlap_records(self,df, seqid_col, bitscore_col, sort_cols, ascending_cols, overlap_threshold=1): size = len(df) @@ -215,7 +270,17 @@ def recursive_filter_overlap_records(self,df, seqid_col, bitscore_col, sort_cols df = self.remove_redundant_hits(df, seqid_col, bitscore_col, overlap_threshold=overlap_threshold) prev_size = size size = len(df) - + return df.sort_values(sort_cols,ascending=ascending_cols).reset_index(drop=True) + + def recursive_filter_overlap_records_dual_key(self,df, locus_col, seqid_col, bitscore_col, sort_cols, ascending_cols, overlap_threshold=1): + size = len(df) + prev_size = 0 + while size != prev_size: + df = df.sort_values(sort_cols, + ascending=ascending_cols).reset_index(drop=True) + df = self.remove_redundant_hits_dual_key(df, locus_col, seqid_col, bitscore_col, overlap_threshold=overlap_threshold) + prev_size = size + size = len(df) return df.sort_values(sort_cols,ascending=ascending_cols).reset_index(drop=True) def extract_seq(self,loci_data,seq_data): @@ -278,13 +343,14 @@ def extract_seq(self,loci_data,seq_data): return seqs def extend(self,df,seqid_col, queryid_col, qstart_col, qend_col, sstart_col,send_col,slen_col, qlen_col, bitscore_col, overlap_threshold=1): - #sort_cols = [seqid_col, 'locus_name','ext_start', 'ext_end', bitscore_col] - #ascending_cols = [True, True, True, True, False] - #df = self.recursive_filter_overlap_records(df, 'sseqid', bitscore_col, sort_cols, ascending_cols, overlap_threshold) - #df = df.sort_values(sort_cols, - # ascending=ascending_cols).reset_index(drop=True) + sort_cols = [seqid_col, 'locus_name','ext_start', 'ext_end', bitscore_col] + ascending_cols = [True, True, True, True, False] + df = df.sort_values(sort_cols, ascending=ascending_cols).reset_index(drop=True) + df = self.recursive_filter_overlap_records_dual_key(df, 'locus_name','sseqid', bitscore_col, sort_cols, ascending_cols, overlap_threshold) + df = df.sort_values(sort_cols, ascending=ascending_cols).reset_index(drop=True) queries = df[queryid_col].to_list() + #Remove incomplete hits when complete ones are present filtered = [] for query in queries: From 4d08bed33e26128017ff5e6444a72eef9a228217 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 11 Apr 2024 16:32:03 -0400 Subject: [PATCH 08/51] removed testing code --- locidex/classes/extractor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/locidex/classes/extractor.py b/locidex/classes/extractor.py index 0b673ef..a9d96b6 100644 --- a/locidex/classes/extractor.py +++ b/locidex/classes/extractor.py @@ -208,7 +208,6 @@ def remove_redundant_hits(self,df,seqid_col, bitscore_col, overlap_threshold=1): def remove_redundant_hits_dual_key(self,df,locus_col, seqid_col, bitscore_col, overlap_threshold=1): seq_id_list = list(df[seqid_col].unique()) filter_df = [] - df.to_csv("/home/jarobert/tmp.txt",sep="\t",header=True) for seqid in seq_id_list: subset = df[df[seqid_col] == seqid] prev_contig_id = '' From 09773828dcfcdefbfbdaf59bd53e9a5e25ea67ca Mon Sep 17 00:00:00 2001 From: James Date: Fri, 12 Apr 2024 16:35:31 -0400 Subject: [PATCH 09/51] updated paralog handelling --- locidex/classes/extractor.py | 88 +++++++++++++++++++++++++++++++++--- locidex/extract.py | 13 +++++- locidex/report.py | 12 +++-- locidex/search.py | 2 +- 4 files changed, 104 insertions(+), 11 deletions(-) diff --git a/locidex/classes/extractor.py b/locidex/classes/extractor.py index a9d96b6..efd2669 100644 --- a/locidex/classes/extractor.py +++ b/locidex/classes/extractor.py @@ -17,8 +17,10 @@ def __init__(self,df,seq_data,sseqid_col,queryid_col,qstart_col,qend_col,qlen_co pcols = [qstart_col,qend_col,sstart_col,send_col] for c in pcols: self.df[c] = self.df[c].apply(lambda x: x - 1) - sort_cols = [sseqid_col, 'locus_name', sstart_col, send_col, bitscore_col] - ascending_cols = [True, True, True, True, False] + + self.df = self.get_best_hit_query(self.df) + sort_cols = [sseqid_col, 'locus_name', sstart_col, bitscore_col, send_col] + ascending_cols = [True, True, True, False, False] self.df = self.df.sort_values(sort_cols,ascending=ascending_cols).reset_index(drop=True) self.df = self.recursive_filter_redundant_queries(self.df, 'locus_name', sseqid_col, bitscore_col, sort_cols, ascending_cols, overlap_threshold=1) @@ -64,6 +66,55 @@ def set_extraction_pos(self,df,start_col,end_col): df.loc[idx, 'ext_end'] = end return df + def get_best_hit_query(self,df): + sort_cols = ['sseqid', 'locus_name', 'bitscore','qlen','qcovhsp','ext_start','ext_end'] + ascending_cols = [True, True, False, False, False,True, False] + df = df.sort_values(sort_cols, + ascending=ascending_cols).reset_index(drop=True) + loci = list(df['locus_name'].unique()) + filter_df = [] + for locus_name in loci: + subset = df[df['locus_name'] == locus_name] + if len(subset) == 1: + filter_df.append(subset) + continue + + prev_index = 0 + prev_contig_id = '' + prev_contig_start = -1 + prev_contig_end = -1 + prev_score = 0 + filter_rows = [] + for idx, row in subset.iterrows(): + contig_id = row['sseqid'] + contig_start = row['ext_start'] + contig_end = row['ext_end'] + score = float(row['bitscore']) + + if contig_id == prev_contig_id: + if (contig_start >= prev_contig_start and contig_start <= prev_contig_end) or ( + contig_end >= prev_contig_start and contig_end <= prev_contig_end): + overlap = abs(contig_start - prev_contig_end) + + if overlap > 1: + if prev_score < score: + filter_rows.append(prev_index) + else: + filter_rows.append(idx) + continue + + + prev_index = idx + prev_contig_id = contig_id + prev_contig_start = contig_start + prev_contig_end = contig_end + prev_score = score + valid_ids = list( set(subset.index) - set(filter_rows) ) + filter_df.append(subset.filter(valid_ids, axis=0)) + + return pd.concat(filter_df, ignore_index=True) + + def recursive_filter_redundant_queries(self,df, locus_col, sseqid_col, bitscore_col, sort_cols, ascending_cols, overlap_threshold=1): size = len(df) prev_size = 0 @@ -294,6 +345,13 @@ def extract_seq(self,loci_data,seq_data): is_reverse = row['reverse'] is_complement = row['complement'] is_extended = row['is_extended'] + is_5p_extended = row['is_5p_extended'] + is_3p_extended = row['is_3p_extended'] + if is_reverse and is_3p_extended: + start+=1 + if is_complement and is_3p_extended: + end-=1 + is_complete = row['is_complete'] @@ -326,15 +384,23 @@ def extract_seq(self,loci_data,seq_data): cds_valid = False - seqs.append({'seqid':seqid, 'id':str(id), - 'locus_name':locus_name,'query_id':query_id, + seqs.append({ 'id':str(id),'seqid':seqid, + 'locus_name':locus_name,'query_id':query_id,'qlen':row['qlen'], 'start':start, 'end':end, + 'sub_start':row['sub_start'], + 'sub_ent':row['sub_end'], + 'ident':row['ident'], + 'qcovs':row['qcovs'], + 'bitscore':row['bitscore'], 'reverse':is_reverse, 'complement':is_complement, 'is_complete':is_complete, 'is_trunc':is_trunc, 'fivep_trunc':fivep_trunc, 'threep_trunc':threep_trunc, + 'is_extended':is_extended, + 'is_5p_extended':is_5p_extended, + 'is_3p_extended':is_3p_extended, 'seq':seq,'start_codon':start_codon,'stop_codon':stop_codon, 'is_stop_valid':is_stop_valid,'is_start_valid':is_start_valid, 'is_cds_valid':cds_valid}) @@ -342,8 +408,8 @@ def extract_seq(self,loci_data,seq_data): return seqs def extend(self,df,seqid_col, queryid_col, qstart_col, qend_col, sstart_col,send_col,slen_col, qlen_col, bitscore_col, overlap_threshold=1): - sort_cols = [seqid_col, 'locus_name','ext_start', 'ext_end', bitscore_col] - ascending_cols = [True, True, True, True, False] + sort_cols = [seqid_col, 'locus_name','ext_start', bitscore_col, 'ext_end', ] + ascending_cols = [True, True, True, False, False] df = df.sort_values(sort_cols, ascending=ascending_cols).reset_index(drop=True) df = self.recursive_filter_overlap_records_dual_key(df, 'locus_name','sseqid', bitscore_col, sort_cols, ascending_cols, overlap_threshold) df = df.sort_values(sort_cols, ascending=ascending_cols).reset_index(drop=True) @@ -466,6 +532,10 @@ def group_by_locus(self,df,seqid_col,query_col,qlen_col,extend_threshold_ratio = for idx, row in df.iterrows(): query_id = row[query_col] locus_name = row['locus_name'] + ident = row['pident'] + cov = row['qcovs'] + sub_start = row['sstart'] + sub_end = row['send'] start = row['ext_start'] end = row['ext_end'] qlen = row[qlen_col] @@ -509,8 +579,14 @@ def group_by_locus(self,df,seqid_col,query_col,qlen_col,extend_threshold_ratio = loci[locus_name].append({ 'seqid':seqid, 'query_id':query_id, + 'qlen':row['qlen'], + 'bitscore':row['bitscore'], 'start':start, 'end':end, + 'sub_start':sub_start, + 'sub_end':sub_end, + 'ident':ident, + 'qcovs':cov, 'reverse':is_reverse, 'complement':is_complement, 'is_complete':is_complete, diff --git a/locidex/extract.py b/locidex/extract.py index 4066267..f6124ea 100644 --- a/locidex/extract.py +++ b/locidex/extract.py @@ -65,6 +65,14 @@ class CustomFormatter(ArgumentDefaultsHelpFormatter, RawDescriptionHelpFormatter return parser.parse_args() +def write_seq_info(seq_data,out_file): + data = {} + for idx,entry in enumerate(seq_data): + data[idx] = entry + pd.DataFrame.from_dict(data,orient='index').to_csv(out_file,sep="\t",header=True,index=False) + + + def run_extract(config): # Input Parameters input_fasta = config['in_fasta'] @@ -155,6 +163,7 @@ def run_extract(config): 'evalue': min_evalue, 'max_target_seqs': max_target_seqs, 'num_threads': n_threads, + 'word_size':11 } nt_db = "{}.fasta".format(blast_database_paths['nucleotide']) hit_file = os.path.join(blast_dir_base, "hsps.txt") @@ -170,7 +179,8 @@ def run_extract(config): filter_options = { 'evalue': {'min': None, 'max': min_evalue, 'include': None}, 'pident': {'min': min_dna_ident, 'max': None, 'include': None}, - 'qcovs': {'min': min_dna_match_cov, 'max': None, 'include': None} + 'qcovs': {'min': min_dna_match_cov, 'max': None, 'include': None}, + 'qcovhsp': {'min': min_dna_match_cov, 'max': None, 'include': None}, } hit_df = parse_blast(hit_file, BLAST_TABLE_COLS, filter_options).df @@ -193,6 +203,7 @@ def run_extract(config): qlen_col='qlen',sstart_col='sstart',send_col='send',slen_col='slen',sstrand_col='sstrand', bitscore_col='bitscore',filter_contig_breaks=filt_trunc) + write_seq_info(exobj.seqs,os.path.join(outdir,'seq_data.txt')) exobj.df.to_csv(os.path.join(outdir,'filtered.hsps.txt'),header=True,sep="\t",index=False) nt_db_seq_obj = seq_intake(nt_db, 'fasta', 'source', translation_table, perform_annotation=False, skip_trans=True) diff --git a/locidex/report.py b/locidex/report.py index 5950833..35c5850 100644 --- a/locidex/report.py +++ b/locidex/report.py @@ -7,7 +7,7 @@ import pandas as pd -from locidex.constants import SEARCH_RUN_DATA +from locidex.constants import SEARCH_RUN_DATA, START_CODONS, STOP_CODONS from locidex.utils import calc_md5 from locidex.version import __version__ @@ -60,6 +60,8 @@ def __init__(self,data_dict,method='nucleotide',mode='normal',label='locus_name' self.build_profile() + + def filter_queries(self): failed_seqids = set() for seq_id in self.query_seq_data: @@ -68,10 +70,14 @@ def filter_queries(self): stop_count = int(self.query_seq_data[seq_id]['count_internal_stop']) else: stop_count = 0 - if ambig_count > self.max_ambig_count or stop_count > self.max_int_stop_count: failed_seqids.add(seq_id) - + if self.mode == 'conservative': + start_codon = self.query_seq_data[seq_id]["start_codon"] + stop_codon = self.query_seq_data[seq_id]["stop_codon"] + if start_codon not in START_CODONS or stop_codon not in STOP_CODONS: + failed_seqids.add(seq_id) + self.failed_seqids = failed_seqids def build_profile(self): diff --git a/locidex/search.py b/locidex/search.py index 7b0c33e..bfb6964 100644 --- a/locidex/search.py +++ b/locidex/search.py @@ -231,7 +231,7 @@ def run_search(config): run_data['result_file'] = os.path.join(outdir, "seq_store.json") del (filtered_df) - with open(os.path.join(outdir, run_data['result_file']), "w") as fh: + with open(run_data['result_file'], "w") as fh: fh.write(json.dumps(store_obj.record, indent=4)) run_data['analysis_end_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") From 5b8b71bb449c04e8e422d7058d16ab188f819d6a Mon Sep 17 00:00:00 2001 From: James Date: Mon, 15 Apr 2024 09:52:39 -0400 Subject: [PATCH 10/51] added int stop filter --- locidex/report.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/locidex/report.py b/locidex/report.py index 35c5850..5c43053 100644 --- a/locidex/report.py +++ b/locidex/report.py @@ -73,9 +73,10 @@ def filter_queries(self): if ambig_count > self.max_ambig_count or stop_count > self.max_int_stop_count: failed_seqids.add(seq_id) if self.mode == 'conservative': + count_internal_stop = self.query_seq_data[seq_id]['count_internal_stop'] start_codon = self.query_seq_data[seq_id]["start_codon"] stop_codon = self.query_seq_data[seq_id]["stop_codon"] - if start_codon not in START_CODONS or stop_codon not in STOP_CODONS: + if start_codon not in START_CODONS or stop_codon not in STOP_CODONS or count_internal_stop > 0: failed_seqids.add(seq_id) self.failed_seqids = failed_seqids From 2e2621a427ae0fae354551d3ceceb15c688d9d06 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 15 Apr 2024 10:25:29 -0400 Subject: [PATCH 11/51] added int stop filter --- locidex/report.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/locidex/report.py b/locidex/report.py index 5c43053..a2b94c1 100644 --- a/locidex/report.py +++ b/locidex/report.py @@ -198,24 +198,26 @@ def allele_assignment(self,dbtype): hit_loci_names = self.get_hit_locinames() loci_lookup = self.get_loci_to_query_map(hit_loci_names,dbtype) + for locus in loci_lookup: loci_lookup[locus] = list(set(loci_lookup[locus]) - self.failed_seqids) - self.populate_profile() + loci_names_to_assign = set(self.profile.keys()) assigned_loci = set() #Fix the values of any loci where there is a single matching query or no matching queries for locus_name in self.profile: query_hashes = self.profile[locus_name].split(',') - num_queries = len(query_hashes) + num_queries = len(loci_lookup[locus]) if num_queries == 1 and query_hashes[0] != '-': assigned_loci.add(locus_name ) elif locus_name not in loci_lookup or len(loci_lookup[locus_name]) == 0: assigned_loci.add(locus_name) + self.profile[locus_name] = '-' loci_names_to_assign = loci_names_to_assign - assigned_loci @@ -243,7 +245,7 @@ def allele_assignment(self,dbtype): del(profile[locus_name][qid]) self.locus_profile = profile - self.populate_profile() + def get_matching_ref_seq_info(self,qid, dbtype): @@ -356,8 +358,6 @@ def run(): profile = {sample_name: allele_obj.profile} with open(os.path.join(outdir,"profile.json"),"w") as out: json.dump(profile,out,indent=4) - - if report_format == 'profile': allele_obj.extract_hit_data('nucleotide').to_csv(os.path.join(outdir,"nucleotide.hits.txt"),header=True,sep="\t", index=False) allele_obj.extract_hit_data('protein').to_csv(os.path.join(outdir, "protein.hits.txt"), header=True, sep="\t", index=False) From 8e2040d0c1e4cbeba3e3ec55f959dac09941ea03 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 17 Apr 2024 11:07:54 -0400 Subject: [PATCH 12/51] updated report description --- README.md | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0c66f8a..f5bb741 100644 --- a/README.md +++ b/README.md @@ -184,7 +184,23 @@ Produce loci hash profiles in multiple formats (json, tsv, parquet) - Filter results based on user criteria - Multi-copy loci handling -**Optional:** (Not required for MVP) Produce concatenated fasta sequences based on allele profiles +QA Modes: + +Conservative: +A locus is reported with an allele call only if all of the following are true. (Only works with protein coding schemes) +1) Match identity >= threshold +2) Match coverage >= threshold +3) Valid start codon present +4) Valid stop codon present +5) No internal stop codons +6) Only a single hit meets the criteria above + +Normal +A locus is reported with an allele call only if all of the following are true. +1) Match identity >= threshold +2) Match coverage >= threshold +3) Multiple matches to a single locus are hashed to produce an allele call which is the hash of the (n) match hashes found + #### Input A Sequence store (`seq_store.json`) object produced by the 'search' function. @@ -193,7 +209,14 @@ A Sequence store (`seq_store.json`) object produced by the 'search' function. #### Output -[INSERT REPORT OUTPUT] +``` +{out folder name} +├── profile.json +└── results.json +``` + + + ### Merge From c48fc957740645287b2db28585c2ed9feb8aa2c5 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 22 Apr 2024 11:32:25 -0400 Subject: [PATCH 13/51] updated report format to include db info and sequence data --- locidex/report.py | 86 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 67 insertions(+), 19 deletions(-) diff --git a/locidex/report.py b/locidex/report.py index ea10002..d85754f 100644 --- a/locidex/report.py +++ b/locidex/report.py @@ -6,7 +6,7 @@ from datetime import datetime import pandas as pd - +from locidex.classes.seq_intake import seq_intake from locidex.constants import SEARCH_RUN_DATA, START_CODONS, STOP_CODONS from locidex.utils import calc_md5 from locidex.version import __version__ @@ -16,18 +16,21 @@ def add_args(parser=None): if parser is None: parser = ArgumentParser( - description="Locidex: Advanced searching and filtering of sequence databases using query sequences") - parser.add_argument('-i','--input', type=str, required=True,help='Input file to report') + description="Locidex Report: Generate a report from search results") + parser.add_argument('-i','--input', type=str, required=True,help='Input seq_store file to report') + parser.add_argument('-f','--fasta', type=str, required=False,help='Optional: Query fasta file used to generate search results') parser.add_argument('-o', '--outdir', type=str, required=True, help='Output file to put results') parser.add_argument('-n', '--name', type=str, required=False, help='Sample name to include default=filename') parser.add_argument('-m', '--mode', type=str, required=False, help='Allele profile assignment [normal,conservative,fuzzy]',default='normal') parser.add_argument('-p', '--prop', type=str, required=False, help='Metadata label to use for aggregation',default='locus_name') parser.add_argument('-a', '--max_ambig', type=int, required=False, help='Maximum number of ambiguous characters allowed in a sequence',default=0) parser.add_argument('-s', '--max_stop', type=int, required=False, help='Maximum number of internal stop codons allowed in a sequence',default=0) - parser.add_argument('--report_format', type=str, required=False, - help='Report format of parsed results [profile]',default='profile') parser.add_argument('-r', '--match_ident', type=float, required=False, - help='Report match allele if percent difference is less than this value',default=100) + help='Report match allele if percent difference is >= this value',default=100) + parser.add_argument('-r', '--match_cov', type=float, required=False, + help='Report match allele if percent coverage is >+ this value',default=100) + parser.add_argument('--translation_table', type=int, required=False, + help='output directory', default=11) parser.add_argument('-V', '--version', action='version', version="%(prog)s " + __version__) parser.add_argument('-f', '--force', required=False, help='Overwrite existing directory', action='store_true') @@ -321,21 +324,20 @@ def run(cmd_args=None): input_file = cmd_args.input outdir = cmd_args.outdir label = cmd_args.prop - report_format = cmd_args.report_format sample_name = cmd_args.name force = cmd_args.force mode = cmd_args.mode + fasta_file = cmd_args.fasta max_ambig = cmd_args.max_ambig max_int_stop = cmd_args.max_stop match_ident = cmd_args.match_ident + match_cov = cmd_args.match_cov + translation_table = cmd_args.translation_table - if sample_name is None: - sample_name = '.'.join(os.path.basename(input_file).split('.')[:-1]) - run_data = SEARCH_RUN_DATA run_data['analysis_start_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") - run_data['parameters'] = vars(cmd_args) + run_data['parameters'] = analysis_parameters if os.path.isdir(outdir) and not force: print(f'Error {outdir} exists, if you would like to overwrite, then specify --force') @@ -351,17 +353,63 @@ def run(cmd_args=None): if len(seq_store_dict) == 0: sys.exit() + if sample_name is None: + sample_name = seq_store_dict["query_data"]["sample_name"] + + #validate the ids + seq_data = {} + if fasta_file is not None: + seq_info = seq_store_dict["query_data"]["query_seq_data"] + seq_obj = seq_intake(fasta_file, format, 'CDS', translation_table, perform_annotation=False) + if len(seq_info) != len(seq_obj.seq_data): + print(f'Error the supplied fasta file: {fasta_file} ({len(seq_obj.seq_data)}) seq_store file: {input_file} ({len(seq_info)}) \ + do not have the same number of sequences. These files must be matched') + sys.exit() + + for id in seq_info: + if id not in seq_obj.seq_data: + print(f'Error {id} key from seq_store not found in fasta file') + sys.exit() + pid_1 = seq_info[id]["seq_id"] + pid_2 = seq_obj.seq_data[id]["seq_id"] + if pid_1 != pid_2: + print(f'Error seq_store key for {id}: {pid_1} mismatched to input fasta {id}: {pid_2}. These files must be matched') + sys.exit() + seq_data = seq_obj.seq_data + allele_obj = seq_reporter(seq_store_dict, method='nucleotide', mode=mode, label=label, filters={},max_ambig=max_ambig,max_int_stop=max_int_stop,match_ident=match_ident) - if report_format == 'profile': - allele_obj.filter_queries() - allele_obj.allele_assignment('nucleotide') - profile = {sample_name: allele_obj.profile} - with open(os.path.join(outdir,"profile.json"),"w") as out: - json.dump(profile,out,indent=4) - allele_obj.extract_hit_data('nucleotide').to_csv(os.path.join(outdir,"nucleotide.hits.txt"),header=True,sep="\t", index=False) - allele_obj.extract_hit_data('protein').to_csv(os.path.join(outdir, "protein.hits.txt"), header=True, sep="\t", index=False) + + allele_obj.filter_queries() + allele_obj.allele_assignment('nucleotide') + allele_obj.extract_hit_data('nucleotide').to_csv(os.path.join(outdir,"nucleotide.hits.txt"),header=True,sep="\t", index=False) + allele_obj.extract_hit_data('protein').to_csv(os.path.join(outdir, "protein.hits.txt"), header=True, sep="\t", index=False) + + + profile = { + "db_info":seq_store_dict["db_info"], + 'parameters':{ + 'mode':mode, + 'min_match_ident':match_ident, + 'min_match_cov':match_cov, + 'max_ambiguous':max_ambig, + 'max_internal_stops':max_int_stop + }, + 'data':{ + 'sample_name':sample_name, + 'profile':{sample_name: allele_obj.profile}, + 'seq_data':seq_data + } + } + with open(os.path.join(outdir,"report.json"),"w") as out: + json.dump(profile,out,indent=4) + + + + run_data['analysis_end_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") + with open(os.path.join(outdir,"run.json"),'w' ) as fh: + fh.write(json.dumps(run_data, indent=4)) From 7f85c6915b7e668a6e3143322d7709397b992470 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 22 Apr 2024 11:43:40 -0400 Subject: [PATCH 14/51] updated report to use the same template as search module --- locidex/report.py | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/locidex/report.py b/locidex/report.py index d85754f..c6af6b4 100644 --- a/locidex/report.py +++ b/locidex/report.py @@ -312,7 +312,7 @@ def extract_hit_data(self,dbtype): return pd.DataFrame.from_dict(data) -def run(cmd_args=None): +def run_report(cmd_args=None): if cmd_args is None: parser = add_args() @@ -376,7 +376,7 @@ def run(cmd_args=None): print(f'Error seq_store key for {id}: {pid_1} mismatched to input fasta {id}: {pid_2}. These files must be matched') sys.exit() seq_data = seq_obj.seq_data - + allele_obj = seq_reporter(seq_store_dict, method='nucleotide', mode=mode, label=label, filters={},max_ambig=max_ambig,max_int_stop=max_int_stop,match_ident=match_ident) @@ -402,16 +402,52 @@ def run(cmd_args=None): 'seq_data':seq_data } } + + if len(profile['data']['seq_data']) > 0: + # add locus information to seq_data + look_up = {} + for locus_name in profile['data']['profile']: + h = profile['data']['profile'][locus_name] + if h not in look_up: + look_up[h] = [] + look_up[h].append(locus_name) + + for seq_id in profile['data']['seq_data']: + h = profile['data']['seq_data'][seq_id]['dna_hash'] + if h in look_up: + profile['data']['seq_data'][seq_id]['locus_name'] = ",".join([str(x) for x in look_up[h]]) + else: + profile['data']['seq_data'][seq_id]['locus_name'] = '' + + with open(os.path.join(outdir,"report.json"),"w") as out: json.dump(profile,out,indent=4) - run_data['analysis_end_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") with open(os.path.join(outdir,"run.json"),'w' ) as fh: fh.write(json.dumps(run_data, indent=4)) +def run(cmd_args=None): + #cmd_args = parse_args() + if cmd_args is None: + parser = add_args() + cmd_args = parser.parse_args() + analysis_parameters = vars(cmd_args) + config_file = cmd_args.config + + config = {} + if config_file is not None: + with open(config_file) as fh: + config = json.loads(fh.read()) + + for p in analysis_parameters: + if not p in config: + config[p] = analysis_parameters[p] + + run_report(config) + # call main function if __name__ == '__main__': From 2f346e7e57eca0d5bb14069aad1a2efa47af36ad Mon Sep 17 00:00:00 2001 From: James Date: Mon, 22 Apr 2024 11:46:00 -0400 Subject: [PATCH 15/51] updated report to use the same template as search module --- locidex/report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/locidex/report.py b/locidex/report.py index c6af6b4..5b00b90 100644 --- a/locidex/report.py +++ b/locidex/report.py @@ -423,7 +423,7 @@ def run_report(cmd_args=None): with open(os.path.join(outdir,"report.json"),"w") as out: json.dump(profile,out,indent=4) - + run_data['result_file'] = os.path.join(outdir,"report.json") run_data['analysis_end_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") with open(os.path.join(outdir,"run.json"),'w' ) as fh: fh.write(json.dumps(run_data, indent=4)) From a9b05143a6e6d8b4f26b5b4f1151191d9c1bf9fd Mon Sep 17 00:00:00 2001 From: James Date: Mon, 22 Apr 2024 12:11:55 -0400 Subject: [PATCH 16/51] added in seq_data --- locidex/report.py | 57 +++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/locidex/report.py b/locidex/report.py index 5b00b90..c4acafe 100644 --- a/locidex/report.py +++ b/locidex/report.py @@ -18,7 +18,8 @@ def add_args(parser=None): parser = ArgumentParser( description="Locidex Report: Generate a report from search results") parser.add_argument('-i','--input', type=str, required=True,help='Input seq_store file to report') - parser.add_argument('-f','--fasta', type=str, required=False,help='Optional: Query fasta file used to generate search results') + parser.add_argument('--fasta', type=str, required=False,help='Optional: Query fasta file used to generate search results') + parser.add_argument('-c', '--config', type=str, required=False, help='Locidex parameter config file (json)') parser.add_argument('-o', '--outdir', type=str, required=True, help='Output file to put results') parser.add_argument('-n', '--name', type=str, required=False, help='Sample name to include default=filename') parser.add_argument('-m', '--mode', type=str, required=False, help='Allele profile assignment [normal,conservative,fuzzy]',default='normal') @@ -27,7 +28,7 @@ def add_args(parser=None): parser.add_argument('-s', '--max_stop', type=int, required=False, help='Maximum number of internal stop codons allowed in a sequence',default=0) parser.add_argument('-r', '--match_ident', type=float, required=False, help='Report match allele if percent difference is >= this value',default=100) - parser.add_argument('-r', '--match_cov', type=float, required=False, + parser.add_argument('-l','--match_cov', type=float, required=False, help='Report match allele if percent coverage is >+ this value',default=100) parser.add_argument('--translation_table', type=int, required=False, help='output directory', default=11) @@ -312,27 +313,23 @@ def extract_hit_data(self,dbtype): return pd.DataFrame.from_dict(data) -def run_report(cmd_args=None): +def run_report(config): - if cmd_args is None: - parser = add_args() - cmd_args = parser.parse_args() - - analysis_parameters = vars(cmd_args) + analysis_parameters = config #Input Parameters - input_file = cmd_args.input - outdir = cmd_args.outdir - label = cmd_args.prop - sample_name = cmd_args.name - force = cmd_args.force - mode = cmd_args.mode - fasta_file = cmd_args.fasta - max_ambig = cmd_args.max_ambig - max_int_stop = cmd_args.max_stop - match_ident = cmd_args.match_ident - match_cov = cmd_args.match_cov - translation_table = cmd_args.translation_table + input_file = config['input'] + outdir = config['outdir'] + label = config['prop'] + sample_name = config['name'] + force = config['force'] + mode = config['mode'] + fasta_file = config['fasta'] + max_ambig = config['max_ambig'] + max_int_stop = config['max_stop'] + match_ident = config['match_ident'] + match_cov = config['match_cov'] + translation_table = config['translation_table'] run_data = SEARCH_RUN_DATA @@ -360,22 +357,23 @@ def run_report(cmd_args=None): seq_data = {} if fasta_file is not None: seq_info = seq_store_dict["query_data"]["query_seq_data"] - seq_obj = seq_intake(fasta_file, format, 'CDS', translation_table, perform_annotation=False) + seq_obj = seq_intake(fasta_file, 'fasta', 'CDS', translation_table, perform_annotation=False) if len(seq_info) != len(seq_obj.seq_data): print(f'Error the supplied fasta file: {fasta_file} ({len(seq_obj.seq_data)}) seq_store file: {input_file} ({len(seq_info)}) \ do not have the same number of sequences. These files must be matched') sys.exit() - - for id in seq_info: - if id not in seq_obj.seq_data: - print(f'Error {id} key from seq_store not found in fasta file') + + for i in range(0,len(seq_obj.seq_data)): + id = str(i) + if id not in seq_info: + print(f'Error {id} key from fasta file not in seq_store') sys.exit() pid_1 = seq_info[id]["seq_id"] - pid_2 = seq_obj.seq_data[id]["seq_id"] + pid_2 = seq_obj.seq_data[i]["seq_id"] if pid_1 != pid_2: print(f'Error seq_store key for {id}: {pid_1} mismatched to input fasta {id}: {pid_2}. These files must be matched') sys.exit() - seq_data = seq_obj.seq_data + seq_data[id] = seq_obj.seq_data[i] allele_obj = seq_reporter(seq_store_dict, method='nucleotide', mode=mode, label=label, filters={},max_ambig=max_ambig,max_int_stop=max_int_stop,match_ident=match_ident) @@ -406,8 +404,9 @@ def run_report(cmd_args=None): if len(profile['data']['seq_data']) > 0: # add locus information to seq_data look_up = {} - for locus_name in profile['data']['profile']: - h = profile['data']['profile'][locus_name] + for locus_name in profile['data']['profile'][sample_name]: + h = profile['data']['profile'][sample_name][locus_name] + print(h) if h not in look_up: look_up[h] = [] look_up[h].append(locus_name) From 38875fb62b37792d964f2df684c5ab36aae4bb60 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 22 Apr 2024 12:55:20 -0400 Subject: [PATCH 17/51] merge updated template and connection to new report format --- locidex/merge.py | 113 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 97 insertions(+), 16 deletions(-) diff --git a/locidex/merge.py b/locidex/merge.py index 882906f..b1cb87b 100644 --- a/locidex/merge.py +++ b/locidex/merge.py @@ -7,20 +7,23 @@ from datetime import datetime from functools import partial from mimetypes import guess_type - +from multiprocessing import Pool, cpu_count import pandas as pd - +from locidex.classes.aligner import align, parse_align from locidex.version import __version__ def add_args(parser=None): if parser is None: parser = ArgumentParser( - description="Locidex merge: Concatonate set of input profile.json files into a tsv table") - + description="Locidex merge: Concatonate set of input profile.json files into a tsv table or aligned fasta") parser.add_argument('-i','--input', type=str, required=True,help='Input file to report', action='append', nargs='+') parser.add_argument('-o', '--outdir', type=str, required=True, help='Output file to put results') + parser.add_argument('--n_threads','-t', type=int, required=False, + help='CPU Threads to use', default=1) parser.add_argument('-V', '--version', action='version', version="%(prog)s " + __version__) + parser.add_argument('-a', '--align', required=False, help='Perform alignment with individual loci to produce a concatenated alignment', + action='store_true') parser.add_argument('-f', '--force', required=False, help='Overwrite existing directory', action='store_true') return parser @@ -50,26 +53,60 @@ def read_file_list(file_list): _open = partial(gzip.open, mode='rt') if encoding == 'gzip' else open with _open(f) as fh: data = json.load(fh) - records = records | data + records[data['data']['sample_name']] = data return records +def extract_profiles(records): + profile = {} + for id in records: + for sample_name in records[id]['data']['profile']: + profile[sample_name] = records[id]['data']['profile'][sample_name] + return profile + +def extract_seqs(records): + seqs = {} + for id in records: + if not 'seq_data' in records[id]['data']: + continue + seqs[id] = records[id]['data']['seq_data'] + return seqs + +def write_gene_fastas(seq_data,work_dir): + d = 0 + files = {} + for id in seq_data: + record = seq_data[id] + locus_name = record['locus_name'] + if 'dna_seq' in record: + seq = record['dna_seq'] + else: + seq = record['aa_seq'] + out_file = os.path.join(work_dir, f"{locus_name}.fas") + if not os.path.isfile(out_file): + oh = open(out_file,'w') + files[locus_name] = {'file':out_file} + else: + oh = open(out_file,'a') + seq_name = f'{locus_name}|{id}|{d}' + oh.write(f'>{seq_name}\n{seq}\n') + oh.close() + d+=1 + return files +def run_merge(config): + analysis_parameters = config -def run(cmd_args=None): - if cmd_args is None: - parser = add_args() - cmd_args = parser.parse_args() - analysis_parameters = vars(cmd_args) #Input Parameters - input_files = cmd_args.input[0] - outdir = cmd_args.outdir - force = cmd_args.force - + input_files = config['input'][0] + outdir = config['outdir'] + perform_align = config['align'] + n_threads = config['n_threads'] + force = config['force'] run_data = {} run_data['analysis_start_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") - run_data['parameters'] = vars(cmd_args) + run_data['parameters'] = analysis_parameters if os.path.isdir(outdir) and not force: print(f'Error {outdir} exists, if you would like to overwrite, then specify --force') @@ -78,15 +115,59 @@ def run(cmd_args=None): if not os.path.isdir(outdir): os.makedirs(outdir, 0o755) + #perform merge file_list = get_file_list(input_files) records = read_file_list(file_list) - df = pd.DataFrame.from_dict(records, orient='index') + #create profile + df = pd.DataFrame.from_dict(extract_profiles(records), orient='index') df.insert(loc=0, column='sample_id', value=df.index.tolist()) df.to_csv(os.path.join(outdir,'profile.tsv'),index=False,header=True,sep="\t") + #create alignment + if perform_align: + pass + work_dir = os.path.join(outdir,"raw_gene_fastas") + if not os.path.isdir(work_dir): + os.makedirs(work_dir, 0o755) + + seq_data = extract_seqs(records) + gene_files = write_gene_fastas(seq_data,work_dir) + pool = Pool(processes=n_threads) + + results = [] + for locus_name in gene_files: + results.append(pool.apply_async(align, args=((gene_files[locus_name]['file'],)))) + + pool.close() + pool.join() + for locus_name in gene_files: + parse_align(align) + + + + +def run(cmd_args=None): + #cmd_args = parse_args() + if cmd_args is None: + parser = add_args() + cmd_args = parser.parse_args() + analysis_parameters = vars(cmd_args) + config_file = cmd_args.config + + config = {} + if config_file is not None: + with open(config_file) as fh: + config = json.loads(fh.read()) + + for p in analysis_parameters: + if not p in config: + config[p] = analysis_parameters[p] + + run_merge(config) + # call main function if __name__ == '__main__': From 42dd4b3f98e7715db3cbc61ca288c97072f2096e Mon Sep 17 00:00:00 2001 From: James Date: Mon, 22 Apr 2024 13:04:17 -0400 Subject: [PATCH 18/51] merge included mafft alignment and production of concatenated alignment --- locidex/merge.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/locidex/merge.py b/locidex/merge.py index b1cb87b..a29267d 100644 --- a/locidex/merge.py +++ b/locidex/merge.py @@ -21,6 +21,8 @@ def add_args(parser=None): parser.add_argument('-o', '--outdir', type=str, required=True, help='Output file to put results') parser.add_argument('--n_threads','-t', type=int, required=False, help='CPU Threads to use', default=1) + parser.add_argument('--linker','-l', type=str, required=False, + help='Linker sequence for alignment', default='NNNNNNNNNNNNNNNNNNNN') parser.add_argument('-V', '--version', action='version', version="%(prog)s " + __version__) parser.add_argument('-a', '--align', required=False, help='Perform alignment with individual loci to produce a concatenated alignment', action='store_true') @@ -100,6 +102,7 @@ def run_merge(config): input_files = config['input'][0] outdir = config['outdir'] perform_align = config['align'] + linker_seq = config['linker'] n_threads = config['n_threads'] force = config['force'] @@ -125,9 +128,12 @@ def run_merge(config): column='sample_id', value=df.index.tolist()) df.to_csv(os.path.join(outdir,'profile.tsv'),index=False,header=True,sep="\t") + sample_names = list(df['sample_id']) + del(df) + run_data['result_file'] = os.path.join(outdir,"profile.tsv") #create alignment - if perform_align: + if perform_align and len(records) > 1: pass work_dir = os.path.join(outdir,"raw_gene_fastas") if not os.path.isdir(work_dir): @@ -143,8 +149,28 @@ def run_merge(config): pool.close() pool.join() - for locus_name in gene_files: - parse_align(align) + + loci_names = list(gene_files.keys()) + alignment = {} + for i in range(0,len(results)): + alignment[loci_names[i]] = parse_align(results[i]) + results[i] = None + del(results) + + out_align = os.path.join(outdir,'loci_alignment.fas') + oh = open(out_align,'w') + for sample_id in sample_names: + seq = [] + for locus_name in loci_names: + seq.append(alignment[locus_name][sample_id]) + seq.append(linker_seq) + oh.write('>{}\n{}'.format(sample_id,"".join(seq))) + oh.close() + run_data['alignment_file'] = out_align + + run_data['analysis_end_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") + with open(os.path.join(outdir,"run.json"),'w' ) as fh: + fh.write(json.dumps(run_data, indent=4)) From 04717623c4b0071dd833e1f175721b3fa85c31fc Mon Sep 17 00:00:00 2001 From: James Date: Mon, 22 Apr 2024 13:10:30 -0400 Subject: [PATCH 19/51] added missing input file protection --- locidex/merge.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/locidex/merge.py b/locidex/merge.py index a29267d..5fee8c4 100644 --- a/locidex/merge.py +++ b/locidex/merge.py @@ -39,11 +39,18 @@ def get_file_list(input_files): if re.search(".json$", input_files[0]) or re.search(".json.gz$", input_files[0]): file_list = input_files else: + if not os.path.isfile(input_files[0]): + print(f'Error the supplied file {input_files[0]} does not exist') + sys.exit() encoding = guess_type(input_files[0])[1] _open = partial(gzip.open, mode='rt') if encoding == 'gzip' else open with _open(input_files[0]) as f: for line in f: - file_list.append(line.rstrip()) + line = line.rstrip() + if not os.path.isfile(line): + print(f'Error the supplied file {line} does not exist') + sys.exit() + file_list.append(line) return file_list def read_file_list(file_list): @@ -167,7 +174,7 @@ def run_merge(config): oh.write('>{}\n{}'.format(sample_id,"".join(seq))) oh.close() run_data['alignment_file'] = out_align - + run_data['analysis_end_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") with open(os.path.join(outdir,"run.json"),'w' ) as fh: fh.write(json.dumps(run_data, indent=4)) @@ -181,13 +188,9 @@ def run(cmd_args=None): parser = add_args() cmd_args = parser.parse_args() analysis_parameters = vars(cmd_args) - config_file = cmd_args.config - config = {} - if config_file is not None: - with open(config_file) as fh: - config = json.loads(fh.read()) + config = {} for p in analysis_parameters: if not p in config: config[p] = analysis_parameters[p] From b4bf401995716601d5a368795e4b3d9bd205ca47 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 22 Apr 2024 13:30:31 -0400 Subject: [PATCH 20/51] added db_version validation --- locidex/merge.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/locidex/merge.py b/locidex/merge.py index 5fee8c4..16d360e 100644 --- a/locidex/merge.py +++ b/locidex/merge.py @@ -24,6 +24,8 @@ def add_args(parser=None): parser.add_argument('--linker','-l', type=str, required=False, help='Linker sequence for alignment', default='NNNNNNNNNNNNNNNNNNNN') parser.add_argument('-V', '--version', action='version', version="%(prog)s " + __version__) + parser.add_argument('-s', '--strict', required=False, help='Only merge data produces by the same db', + action='store_true') parser.add_argument('-a', '--align', required=False, help='Perform alignment with individual loci to produce a concatenated alignment', action='store_true') parser.add_argument('-f', '--force', required=False, help='Overwrite existing directory', @@ -53,8 +55,9 @@ def get_file_list(input_files): file_list.append(line) return file_list -def read_file_list(file_list): +def read_file_list(file_list,perform_validation=False): records = {} + db_info = {} for f in file_list: if not os.path.isfile(f): continue @@ -62,6 +65,16 @@ def read_file_list(file_list): _open = partial(gzip.open, mode='rt') if encoding == 'gzip' else open with _open(f) as fh: data = json.load(fh) + if 'db_info' not in data: + print("Error malformed or invalid input file {}, missing 'db_info'".format(f)) + sys.exit() + if len(db_info) == 0: + db_info = data['db_info'] + if db_info["db_name"] != data['db_info']["db_name"] or \ + db_info["db_version"] != data['db_info']["db_version"]: + print("Error you are attempting to merge files generated with different databases {} vs. {}: {}".format(db_info,f,data['db_info'])) + sys.exit() + records[data['data']['sample_name']] = data return records @@ -112,6 +125,9 @@ def run_merge(config): linker_seq = config['linker'] n_threads = config['n_threads'] force = config['force'] + validate_db = config['strict'] + if validate_db is None or validate_db == '': + validate_db = False run_data = {} @@ -127,7 +143,7 @@ def run_merge(config): #perform merge file_list = get_file_list(input_files) - records = read_file_list(file_list) + records = read_file_list(file_list,perform_validation=validate_db) #create profile df = pd.DataFrame.from_dict(extract_profiles(records), orient='index') From 1dae7cf8b67ff5b6dd29e13e329497e1759c890c Mon Sep 17 00:00:00 2001 From: James Date: Mon, 22 Apr 2024 19:04:56 -0400 Subject: [PATCH 21/51] updated search to use manifest, created manifest module --- locidex/manifest.py | 106 ++++++++++++++++++++++++++++++++++++++++++++ locidex/merge.py | 81 ++++++++++++++++++++++++--------- locidex/report.py | 1 - 3 files changed, 167 insertions(+), 21 deletions(-) create mode 100644 locidex/manifest.py diff --git a/locidex/manifest.py b/locidex/manifest.py new file mode 100644 index 0000000..0e31e40 --- /dev/null +++ b/locidex/manifest.py @@ -0,0 +1,106 @@ +import pathlib +import json +import os +import re +import sys +from argparse import (ArgumentParser, ArgumentDefaultsHelpFormatter, RawDescriptionHelpFormatter) +from datetime import datetime +from locidex.version import __version__ + + +def add_args(parser=None): + if parser is None: + parser = ArgumentParser( + description="Locidex manifest: Setup directory of databases for use with search") + parser.add_argument('-i','--input', type=str, required=True,help='Input directory of locidex databases') + parser.add_argument('-V', '--version', action='version', version="%(prog)s " + __version__) + return parser + +def run_merge(config): + analysis_parameters = config + + #Input Parameters + input_dir = config['input'] + in_dirname = input_dir.split('/')[-1] + run_data = {} + run_data['analysis_start_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") + run_data['parameters'] = analysis_parameters + + db_keys = [ + "db_name", + "db_version", + "db_date", + "db_author", + "db_desc", + "db_num_seqs", + ] + + d = pathlib.Path(input_dir).rglob('*') + config_files = {} + for item in d: + if item.is_dir(): + continue + fpath = item.resolve() + dirname = os.path.dirname(fpath).split('/')[-1] + fname = os.path.basename(item) + if fname != 'config.json': + continue + c = {} + with open(fpath ,'r') as fh: + c = json.load(fh) + if len(c) == 0: + continue + for field in db_keys: + if not field in c: + print(f'Error db config: {fpath} is missing a needed field key for {field}, please set one') + sys.exit() + + v = c[field] + if v == '': + print(f'Error db config: {fpath} is missing a needed field value for {field}, please set one') + sys.exit() + + db_name = str(c['db_name']) + db_version = str(c['db_version']) + if not db_name in config_files: + config_files[db_name] = {} + if db_version in config_files[db_name]: + print(f"Error you are trying to populate duplicate entries for db_name {db_name} and version {db_version}. \ + Manifest only supports distinct db_entries, please resolve duplicates") + sys.exit() + + config_files[db_name][db_version] = { + 'db_relative_path_dir': f"{in_dirname}/{dirname}", + 'db_relative_path_config': f"{in_dirname}/{dirname}/config.json", + } + + with open(os.path.join(input_dir,"manifest.json"),'w' ) as fh: + fh.write(json.dumps(config_files, indent=4)) + + run_data['analysis_end_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") + with open(os.path.join(input_dir,"run.json"),'w' ) as fh: + fh.write(json.dumps(run_data, indent=4)) + + + +def run(cmd_args=None): + #cmd_args = parse_args() + if cmd_args is None: + parser = add_args() + cmd_args = parser.parse_args() + analysis_parameters = vars(cmd_args) + + + config = {} + for p in analysis_parameters: + if not p in config: + config[p] = analysis_parameters[p] + + run_merge(config) + + +# call main function +if __name__ == '__main__': + run() + + diff --git a/locidex/merge.py b/locidex/merge.py index 16d360e..443b6f1 100644 --- a/locidex/merge.py +++ b/locidex/merge.py @@ -60,7 +60,8 @@ def read_file_list(file_list,perform_validation=False): db_info = {} for f in file_list: if not os.path.isfile(f): - continue + print(f"Error cannot open input file {f}") + sys.exit encoding = guess_type(f)[1] _open = partial(gzip.open, mode='rt') if encoding == 'gzip' else open with _open(f) as fh: @@ -74,7 +75,6 @@ def read_file_list(file_list,perform_validation=False): db_info["db_version"] != data['db_info']["db_version"]: print("Error you are attempting to merge files generated with different databases {} vs. {}: {}".format(db_info,f,data['db_info'])) sys.exit() - records[data['data']['sample_name']] = data return records @@ -90,6 +90,7 @@ def extract_seqs(records): for id in records: if not 'seq_data' in records[id]['data']: continue + seqs[id] = records[id]['data']['seq_data'] return seqs @@ -98,21 +99,30 @@ def write_gene_fastas(seq_data,work_dir): files = {} for id in seq_data: record = seq_data[id] - locus_name = record['locus_name'] - if 'dna_seq' in record: - seq = record['dna_seq'] - else: - seq = record['aa_seq'] - out_file = os.path.join(work_dir, f"{locus_name}.fas") - if not os.path.isfile(out_file): - oh = open(out_file,'w') - files[locus_name] = {'file':out_file} - else: - oh = open(out_file,'a') - seq_name = f'{locus_name}|{id}|{d}' - oh.write(f'>{seq_name}\n{seq}\n') - oh.close() - d+=1 + for seq_id in record: + if 'locus_name' not in record[seq_id]: + continue + locus_name = record[seq_id]['locus_name'] + if locus_name == '': + continue + if 'dna_seq' in record[seq_id]: + seq = record[seq_id]['dna_seq'] + else: + seq = record[seq_id]['aa_seq'] + out_file = os.path.join(work_dir, f"{locus_name}.fas") + if not os.path.isfile(out_file): + oh = open(out_file,'w') + files[locus_name] = {'file':out_file} + else: + if not locus_name in files: + oh = open(out_file,'w') + files[locus_name] = {'file':out_file} + else: + oh = open(out_file,'a') + seq_name = f'{id}' + oh.write(f'>{seq_name}\n{seq}\n') + oh.close() + d+=1 return files def run_merge(config): @@ -164,6 +174,8 @@ def run_merge(config): seq_data = extract_seqs(records) gene_files = write_gene_fastas(seq_data,work_dir) + del(records) + del(seq_data) pool = Pool(processes=n_threads) results = [] @@ -173,24 +185,53 @@ def run_merge(config): pool.close() pool.join() + r = [] + for x in results: + if isinstance(x, dict): + r.append(x) + else: + r.append(x.get()) + results = r loci_names = list(gene_files.keys()) alignment = {} + + for i in range(0,len(results)): - alignment[loci_names[i]] = parse_align(results[i]) + alignment[loci_names[i]] = parse_align(results[i][0]) results[i] = None del(results) + loci_lengths = {} + for sample_id in sample_names: + for locus_name in loci_names: + if sample_id not in alignment[locus_name]: + continue + loci_lengths[locus_name] = len(alignment[locus_name][sample_id]) + + out_align = os.path.join(outdir,'loci_alignment.fas') oh = open(out_align,'w') + invalid_loci = set() for sample_id in sample_names: seq = [] for locus_name in loci_names: - seq.append(alignment[locus_name][sample_id]) + print(locus_name) + if locus_name not in loci_lengths: + invalid_loci.add(locus_name) + continue + if sample_id in alignment[locus_name]: + seq.append(alignment[locus_name][sample_id]) + else: + seq.append(''.join(['-']*loci_lengths[locus_name])) seq.append(linker_seq) - oh.write('>{}\n{}'.format(sample_id,"".join(seq))) + oh.write('>{}\n{}\n'.format(sample_id,"".join(seq))) oh.close() run_data['alignment_file'] = out_align + run_data['count_valid_loci'] = len(loci_lengths.keys()) + run_data['count_invalid_loci'] = len(list(invalid_loci)) + run_data['valid_loci'] = ",".join(list(loci_lengths.keys())) + run_data['invalid_loci'] = ",".join(list(invalid_loci)) run_data['analysis_end_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") with open(os.path.join(outdir,"run.json"),'w' ) as fh: fh.write(json.dumps(run_data, indent=4)) diff --git a/locidex/report.py b/locidex/report.py index c4acafe..ac4469b 100644 --- a/locidex/report.py +++ b/locidex/report.py @@ -406,7 +406,6 @@ def run_report(config): look_up = {} for locus_name in profile['data']['profile'][sample_name]: h = profile['data']['profile'][sample_name][locus_name] - print(h) if h not in look_up: look_up[h] = [] look_up[h].append(locus_name) From 5a68dd646330021be0d0d59a9328509a2276f60f Mon Sep 17 00:00:00 2001 From: James Date: Mon, 22 Apr 2024 19:49:03 -0400 Subject: [PATCH 22/51] removed print --- locidex/merge.py | 1 - 1 file changed, 1 deletion(-) diff --git a/locidex/merge.py b/locidex/merge.py index 443b6f1..552f4f5 100644 --- a/locidex/merge.py +++ b/locidex/merge.py @@ -215,7 +215,6 @@ def run_merge(config): for sample_id in sample_names: seq = [] for locus_name in loci_names: - print(locus_name) if locus_name not in loci_lengths: invalid_loci.add(locus_name) continue From 12a3b017efa3665fbd93fbe6a9cf0c2723711532 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 23 Apr 2024 09:27:02 -0400 Subject: [PATCH 23/51] removed blank space --- locidex/merge.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/locidex/merge.py b/locidex/merge.py index 552f4f5..9be231a 100644 --- a/locidex/merge.py +++ b/locidex/merge.py @@ -236,8 +236,6 @@ def run_merge(config): fh.write(json.dumps(run_data, indent=4)) - - def run(cmd_args=None): #cmd_args = parse_args() if cmd_args is None: From 367fbdd063f591de51f7e8c7b2788013095da2a2 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 23 Apr 2024 09:29:02 -0400 Subject: [PATCH 24/51] added manifest module --- locidex/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/locidex/main.py b/locidex/main.py index 4f4088f..bebbc1c 100644 --- a/locidex/main.py +++ b/locidex/main.py @@ -2,7 +2,7 @@ import sys import argparse -from . import format, extract, report, merge, search, build +from . import format, extract, report, merge, search, build, manifest tasks = { 'search': (search, 'Query set of Loci/Genes against a database to produce a sequence store for downstream processing'), @@ -11,6 +11,7 @@ 'merge': (merge, 'Merge a set of gene profiles into a standard profile format'), 'format': (format, 'Format fasta files from other MLST databases for use with locidex build'), 'build': (build, 'Build a locidex database'), + 'manifest': (manifest, 'Create a multi-database folder manifest'), } From 814f1de3407f0423ed35425e9db873bce50ef813 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 23 Apr 2024 09:29:30 -0400 Subject: [PATCH 25/51] removed print --- locidex/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/locidex/main.py b/locidex/main.py index bebbc1c..487b19e 100644 --- a/locidex/main.py +++ b/locidex/main.py @@ -28,7 +28,6 @@ def main(argv=None): if args.command is None: parser.print_help() sys.exit() - #print("args", tasks[args.command][module_idx].run(args)) tasks[args.command][module_idx].run(args) From 4ebee55042c427001ed72c0fd991fedb20e5e2d2 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 23 Apr 2024 10:03:58 -0400 Subject: [PATCH 26/51] changed date format --- locidex/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/locidex/build.py b/locidex/build.py index 9f624a4..8b31fa6 100644 --- a/locidex/build.py +++ b/locidex/build.py @@ -179,7 +179,7 @@ def run(cmd_args=None): config["db_desc"] = cmd_args.db_desc config["db_author"] = cmd_args.author if cmd_args.date == '': - config["db_date"] = datetime.now().strftime("%d/%m/%Y") + config["db_date"] = datetime.now().strftime("%Y/%d/%m") if not os.path.isfile(input_file): print(f'Error {input_file} does not exist, please check path and try again') From 693d7dd2f2ba2627b4c038ef082b9da89e4f0020 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 23 Apr 2024 10:28:55 -0400 Subject: [PATCH 27/51] updated db config to respect which db type is enabled --- locidex/classes/db.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/locidex/classes/db.py b/locidex/classes/db.py index 73204e2..1e24393 100644 --- a/locidex/classes/db.py +++ b/locidex/classes/db.py @@ -85,8 +85,10 @@ def __init__(self, input_dir,db_basenames,config_required_fields,): p = self.get_file_path_by_name('blast','dir') - self.blast_paths['nucleotide'] = os.path.join(p,"nucleotide/nucleotide") - self.blast_paths['protein'] = os.path.join(p, "protein/protein") + if self.config_obj.config["is_nucl"]: + self.blast_paths['nucleotide'] = os.path.join(p,"nucleotide/nucleotide") + if self.config_obj.config["is_prot"]: + self.blast_paths['protein'] = os.path.join(p, "protein/protein") if len(self.blast_paths) == 0: From 2ea2ba70fef04b941da1eb38b19a0200bdd90c77 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 23 Apr 2024 10:34:50 -0400 Subject: [PATCH 28/51] corrected path issue with output seq_store --- locidex/search.py | 65 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/locidex/search.py b/locidex/search.py index af814c8..b8bd5e6 100644 --- a/locidex/search.py +++ b/locidex/search.py @@ -24,6 +24,8 @@ def add_args(parser=None): parser.add_argument('-n', '--name', type=str, required=False, help='Sample name to include default=filename') parser.add_argument('-d', '--db', type=str, required=False, help='Locidex database directory') parser.add_argument('-c', '--config', type=str, required=False, help='Locidex parameter config file (json)') + parser.add_argument('--db_name', type=str, required=False, help='Name of database to perform search, used when a manifest is specified as a db') + parser.add_argument('--db_version', type=str, required=False, help='Version of database to perform search, used when a manifest is specified as a db') parser.add_argument('--min_evalue', type=float, required=False, help='Minumum evalue required for match', default=0.0001) parser.add_argument('--min_dna_len', type=int, required=False, help='Global minumum query length dna', @@ -91,6 +93,8 @@ def run_search(config): sample_name = config['name'] perform_annotation = config['annotate'] max_target_seqs = config['max_target_seqs'] + db_name = config['db_name'] + db_version = config['db_version'] if 'max_ambig_count' in config: max_ambig_count = config['max_ambig_count'] else: @@ -107,6 +111,60 @@ def run_search(config): run_data['analysis_start_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") run_data['parameters'] = config + #check if user supplied a manifest of different databases + if os.path.isfile(db_dir): + if db_name is None or db_name == '': + print(f'You specified a file as the locidex db but no db_name to run, please specify a valid --db_name: {db_dir}') + sys.exit() + + with open(db_dir ,'r') as fh: + manifest = json.load(fh) + + if db_name not in manifest: + print(f'You specified a db name "{db_name}" which does not exist in the manifest file: {db_dir}') + print(f'list of keys in manifest: {list(manifest.keys())}') + sys.exit() + + if db_version is not None and db_version != '': + if db_version not in manifest[db_name]: + print(f'You specified a db name "{db_name}" and db version but the db version "{db_version}"was not found in the manifest {list(manifest[db_name].keys())} ') + sys.exit() + else: + version_codes = list(manifest[db_name].keys()) + if len(version_codes) == 1: + version_code = version_codes[0] + else: + latest_date = None + version_code = None + + for code in version_codes: + if not 'db_date' in manifest[db_name][code]: + print(f'Error db_date field missing from manifest for {db_name}, this field is required if more than 1 db version exists') + sys.exit() + db_date = manifest[db_name][code]['db_date'] + db_date = datetime.strptime(db_date, '%Y/%d/%m') + if version_code is None: + version_code = code + latest_date = db_date + continue + if db_date > latest_date: + latest_date = db_date + version_code = code + db_version = version_code + db_dir_prefix = str(os.path.dirname(db_dir)).split('/') + db_dir_rel_path = manifest[db_name][db_version]['db_relative_path_dir'].split('/') + if db_dir_prefix[-1] == db_dir_rel_path[0]: + db_dir_prefix = db_dir_prefix[0:-1] + db_dir_prefix = "/".join(db_dir_prefix) + db_dir_rel_path = "/".join(db_dir_rel_path ) + db_dir = os.path.join(db_dir_prefix,db_dir_rel_path) + + if not os.path.isdir(db_dir): + print(f'Error DB does not exist: {db_dir}') + sys.exit() + + + # Validate database is valid db_database_config = search_db_conf(db_dir, DB_EXPECTED_FILES, DB_CONFIG_FIELDS) if db_database_config.status == False: @@ -116,7 +174,6 @@ def run_search(config): metadata_path = db_database_config.meta_file_path metadata_obj = db_config(metadata_path, ['meta', 'info']) blast_database_paths = db_database_config.blast_paths - if os.path.isdir(outdir) and not force: print(f'Error {outdir} exists, if you would like to overwrite, then specify --force') sys.exit() @@ -190,7 +247,7 @@ def run_search(config): } store_obj = seq_store(sample_name, db_database_config.config_obj.config, metadata_obj.config['meta'], seq_obj.seq_data, BLAST_TABLE_COLS, hit_filters) - + print(store_obj.record) for db_label in blast_database_paths: label_col = 'index' if db_label == 'nucleotide': @@ -225,9 +282,9 @@ def run_search(config): store_obj.filter_hits() store_obj.convert_profile_to_list() - run_data['result_file'] = "seq_store.json" + run_data['result_file'] = os.path.join(outdir,"seq_store.json") del (filtered_df) - + print(store_obj.record) with open(run_data['result_file'], "w") as fh: fh.write(json.dumps(store_obj.record, indent=4)) From 0b1aeeda7f2854435e2eff07d14da46e3a135efd Mon Sep 17 00:00:00 2001 From: James Date: Tue, 23 Apr 2024 10:37:18 -0400 Subject: [PATCH 29/51] removed print --- locidex/search.py | 1 - 1 file changed, 1 deletion(-) diff --git a/locidex/search.py b/locidex/search.py index b8bd5e6..513d14f 100644 --- a/locidex/search.py +++ b/locidex/search.py @@ -284,7 +284,6 @@ def run_search(config): store_obj.convert_profile_to_list() run_data['result_file'] = os.path.join(outdir,"seq_store.json") del (filtered_df) - print(store_obj.record) with open(run_data['result_file'], "w") as fh: fh.write(json.dumps(store_obj.record, indent=4)) From 5c5a371df05f8604b8b055a0088088c07ca486fb Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 25 Apr 2024 10:42:23 -0500 Subject: [PATCH 30/51] updated test_db.py tests to pass --- tests/test_db.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_db.py b/tests/test_db.py index a27d015..668a016 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -4,6 +4,7 @@ import os from pathlib import Path from locidex.classes.db import db_config, search_db_conf +import locidex.constants as constants required_fields = ["database", "user", "password"] @@ -96,7 +97,7 @@ def temp_db_dir(tmp_path): protein_dir.mkdir() # Create mock files - config_file.write_text(json.dumps({"key": "value"})) + config_file.write_text(json.dumps({i: "value" for i in constants.DB_CONFIG_FIELDS})) meta_file.write_text(json.dumps({"meta": "data"})) (nucleotide_dir / "nucleotide").touch() (protein_dir / "protein").touch() @@ -110,7 +111,8 @@ def test_search_db_conf_initialization_and_blast_paths_setup(temp_db_dir): "config": "db_config.json", "meta": "db_meta.json" } - required_fields = ["key"] + #required_fields = ["key"] + required_fields = [*constants.DB_CONFIG_FIELDS] # Initialize search_db_conf search_conf = search_db_conf(str(temp_db_dir), db_basenames, required_fields) From d7e76df1b6d3da4a9903e7c755f9764bde1fb98a Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 25 Apr 2024 14:59:06 -0500 Subject: [PATCH 31/51] updated manifest tests --- .../blast/nucleotide/nucleotide.fasta | 106 ++ .../blast/nucleotide/nucleotide.ndb | Bin 0 -> 20480 bytes .../blast/nucleotide/nucleotide.nhr | Bin 0 -> 3435 bytes .../blast/nucleotide/nucleotide.nin | Bin 0 -> 800 bytes .../blast/nucleotide/nucleotide.njs | 22 + .../blast/nucleotide/nucleotide.not | Bin 0 -> 644 bytes .../blast/nucleotide/nucleotide.nsq | Bin 0 -> 6297 bytes .../blast/nucleotide/nucleotide.ntf | Bin 0 -> 16384 bytes .../blast/nucleotide/nucleotide.nto | Bin 0 -> 216 bytes .../fails_author/blast/protein/protein.fasta | 106 ++ .../fails/fails_author/config.json | 12 + .../manifest_in/fails/fails_author/meta.json | 1181 +++++++++++++++++ .../fails/fails_author/results.json | 14 + .../blast/nucleotide/nucleotide.fasta | 106 ++ .../blast/nucleotide/nucleotide.ndb | Bin 0 -> 20480 bytes .../blast/nucleotide/nucleotide.nhr | Bin 0 -> 3435 bytes .../blast/nucleotide/nucleotide.nin | Bin 0 -> 800 bytes .../blast/nucleotide/nucleotide.njs | 22 + .../blast/nucleotide/nucleotide.not | Bin 0 -> 644 bytes .../blast/nucleotide/nucleotide.nsq | Bin 0 -> 6297 bytes .../blast/nucleotide/nucleotide.ntf | Bin 0 -> 16384 bytes .../blast/nucleotide/nucleotide.nto | Bin 0 -> 216 bytes .../fails_name/blast/protein/protein.fasta | 106 ++ .../manifest_in/fails/fails_name/config.json | 12 + .../manifest_in/fails/fails_name/meta.json | 1181 +++++++++++++++++ .../manifest_in/fails/fails_name/results.json | 14 + .../example/manifest_in/passes/manifest.json | 8 + .../blast/nucleotide/nucleotide.fasta | 106 ++ .../blast/nucleotide/nucleotide.ndb | Bin 0 -> 20480 bytes .../blast/nucleotide/nucleotide.nhr | Bin 0 -> 3435 bytes .../blast/nucleotide/nucleotide.nin | Bin 0 -> 800 bytes .../blast/nucleotide/nucleotide.njs | 22 + .../blast/nucleotide/nucleotide.not | Bin 0 -> 644 bytes .../blast/nucleotide/nucleotide.nsq | Bin 0 -> 6297 bytes .../blast/nucleotide/nucleotide.ntf | Bin 0 -> 16384 bytes .../blast/nucleotide/nucleotide.nto | Bin 0 -> 216 bytes .../pass_one_db/blast/protein/protein.fasta | 106 ++ .../passes/pass_one_db/config.json | 12 + .../manifest_in/passes/pass_one_db/meta.json | 1181 +++++++++++++++++ .../passes/pass_one_db/results.json | 14 + locidex/example/manifest_in/passes/run.json | 7 + locidex/example/manifest_out/manifest.json | 8 + locidex/example/manifest_out/run.json | 7 + locidex/manifest.py | 17 +- locidex/search.py | 4 +- 45 files changed, 4365 insertions(+), 9 deletions(-) create mode 100644 locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.fasta create mode 100644 locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.ndb create mode 100644 locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.nhr create mode 100644 locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.nin create mode 100644 locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.njs create mode 100644 locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.not create mode 100644 locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.nsq create mode 100644 locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.ntf create mode 100644 locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.nto create mode 100644 locidex/example/manifest_in/fails/fails_author/blast/protein/protein.fasta create mode 100644 locidex/example/manifest_in/fails/fails_author/config.json create mode 100644 locidex/example/manifest_in/fails/fails_author/meta.json create mode 100644 locidex/example/manifest_in/fails/fails_author/results.json create mode 100644 locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.fasta create mode 100644 locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.ndb create mode 100644 locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.nhr create mode 100644 locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.nin create mode 100644 locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.njs create mode 100644 locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.not create mode 100644 locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.nsq create mode 100644 locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.ntf create mode 100644 locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.nto create mode 100644 locidex/example/manifest_in/fails/fails_name/blast/protein/protein.fasta create mode 100644 locidex/example/manifest_in/fails/fails_name/config.json create mode 100644 locidex/example/manifest_in/fails/fails_name/meta.json create mode 100644 locidex/example/manifest_in/fails/fails_name/results.json create mode 100644 locidex/example/manifest_in/passes/manifest.json create mode 100644 locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.fasta create mode 100644 locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.ndb create mode 100644 locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nhr create mode 100644 locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nin create mode 100644 locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.njs create mode 100644 locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.not create mode 100644 locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nsq create mode 100644 locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.ntf create mode 100644 locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nto create mode 100644 locidex/example/manifest_in/passes/pass_one_db/blast/protein/protein.fasta create mode 100644 locidex/example/manifest_in/passes/pass_one_db/config.json create mode 100644 locidex/example/manifest_in/passes/pass_one_db/meta.json create mode 100644 locidex/example/manifest_in/passes/pass_one_db/results.json create mode 100644 locidex/example/manifest_in/passes/run.json create mode 100644 locidex/example/manifest_out/manifest.json create mode 100644 locidex/example/manifest_out/run.json diff --git a/locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.fasta b/locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.fasta new file mode 100644 index 0000000..a03cb89 --- /dev/null +++ b/locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.fasta @@ -0,0 +1,106 @@ +>0 +AAATTCCGTCCCGGACATGCGGACTACACCTATCACCAAAAATACGGTGTGCGAGATTACCGTGGCGGCGGCCGTTCATCGGCACGTGAAACCGCCATGCGTGTTGCTGCGGGAGCGATTGCCAAAAAATATCTGCAGCAAGAGTTTGGCATTGAAGTGCGTGCTTACTTGTCGCAAATGGGGGATGTCGCGATTGATAAAGTGGATTGGAATGAGATTGAAAACAACGATTTCTTCTGTCCTGATGTCGATAAAGTGGCTGCGTTTGACGAGCTGATCCGCGAGCTGAAAAAAGAAGGCGATTCGATCGGCGCGAAAATCCAAGTGGTCGCTACAGGCGTGCCGGTTGGACTGGGTGAGCCTGTGTTTGATCGCTTAGATGCGGATATTGCCCATGCCTTGATGAGCATCAACGCCGTGAAAGGAGTCGAGATTGGTGATGGCTTTGATGTGGTGCGCCAAAAAGGCAGCCAACACCGTGACCCGCTCACTCCACAAGGT +>1 +GTTTTCCGCCCGGGCCATGCCGACTATACCTACGAGCAGAAATACGGTCTGCGCGATTACCGTGGCGGCGGTCGTTCTTCCGCCCGTGAAACGGCGATGCGCGTCGCGGCTGGCGCGATTGCTAAAAAATATCTGGCGGAGAAACACGGCATCGTCATTCAGGGGTGTCTGACCCAGATGGGCGATATTCCGCTTGAAATCAAAGACTGGCAGCAGGTTGAACAAAACCCGTTTTTCTGTCCTGATCCAGATAAAATCGACGCGCTGGATGAACTGATGCGCGCCCTGAAGAAAGAGGGCGATTCGATTGGGGCAAAAGTGACCGTCGTGGCAAACGGCGTTCCGGCCGGGCTTGGCGAACCGGTCTTTGACCGTCTGGATGCGGACATCGCTCATGCGCTGATGAGCATCAACGCGGTAAAAGGCGTGGAGATTGGCGATGGGTTTGATGTGGTCGCGTTGCGAGGCAGCCAGAATCGCGATGAAATTACCAAAGAGGGC +>2 +GTTTTCCGTCCAGGACACGCTGACTATACCTATGAGCAGAAATATGGCCTGCGCGACTACCGTGGCGGCGGACGTTCATCCGCGCGTGAAACGGCGATGCGCGTTGCGGCTGGCGCGATTGCCAAAAAATATCTGGCGGAAAAATTCGGCGTTGAAATTCGCGGCTGTCTGACGCAGATGGGGGATATTCCGCTGGAGATCAAAGACTGGTCTCAGGTGGAGCTTAACCCGTTCTTTTGTCCAGACCCGGATAAAATCGAAGTGCTGGACGAACTGATGCGCGGGCTGAAGAAAGAGGGCGACTCCATCGGGGCAAAAGTGACCGTTGTTGCAAGCGGCGTACCGGCGGGTCTCGGCGAACCTGTATTCGACCGTCTGGATGCCGACATCGCCCATGCGCTGATGAGCATTAACGCCGTTAAGGGCGTTGAGATTGGCGACGGTTTTGACGTTGTTGCGCTGCGCGGCAGTCAGAACCGCGATGAGATCACCAAAGAAGGT +>3 +GTTTTCCGCCCAGGGCATGCTGATTATACCTATGAACAAAAATATGGTTTGCGTGATTATCGTGGTGGTGGACGTTCTTCTGCTCGTGAAACGGCAATGCGTGTCGCCGCAGGTGCGATTGCTAAAAAATATCTAAAAGAGAAATTAGGCATCGAAGTTCGAGGATATCTTTCTCAGCTAGGACCTATTACATGTGATCTTGTTGATTGGTCTATTGTTGAAAGCAATCCATTTTTCTGTCCTGATCCTTCACGTTTAGATGCGCTTGATGAATACATGCGTGCACTTAAAAAAGAAGGTAATTCTATTGGTGCAAAAGTCACTGTGGTTGCACAGGGTGTACCTGCTGGATTTGGTGAACCTGTCTTTGATCGATTAGATGCTGATTTAGCGCATGCTTTGATGAGTATCAATGCTGTCAAAGGTATAGAAATTGGTGATGGATTTGGTGTTGTAACATTAAAAGGTACAGAAAACCGAGATGAAATCACTAAAAAGGGA +>4 +GTTTTCCGTCCAGGCCATGCCGATTACACCTACGAACAAAAATACGGTCTGCGCGATTATCGCGGCGGCGGGCGCTCTTCCGCCCGCGAAACCGCCATGCGCGTGGCGGCAGGGGCGATTGCAAAAAAATATCTCGCCGAGAAATTTGGCATTGAGATTCGCGGCTGCCTGACCCAGATGGGTGACATTCCGCTGGAAATCAAAGACTGGTCGCAGGTCGAGCAAAATCCGTTTTTCTGCCCGGACCCGGACAAAATCGACGCGTTAGATGAACTGATGCGCGCGCTGAAAAAAGAGGGCGACTCCATCGGCGCGAAAGTCACCGTTGTTGCCAGTGGCGTCCCCGCCGGACTTGGCGAGCCGGTCTTTGACCGCCTGGATGCCGACATCGCCCATGCGCTGATGAGCATCAACGCGGTGAAAGGCGTAGAAATTGGTGATGGTTTTGACGTGGTGGCGCTGCGTGGCAGCCAGAACCGCGACGAAATCACCAAAGACGGT +>5 +GTTTTCCGTCCTGGTCACGCCGACTATACCTACGAACAAAAATATGGCTTTCGCGACTATCGCGGCGGCGGGCGTTCTTCCGCGCGTGAAACCGCGATGCGCGTGGCGGCAGGGGCAATTGCCAAAAAATATCTCCAGCAGAAATTCGGCATCGTTATCCGCGGCTGTCTGTCCCAGATGGGCGACATTCCGCTGGCAATCAAAGACTGGGATCAGGTAGAGCTCAACCCGTTCTTCTGCGCCGATGCCGACAAGCTGGACGCGCTGGATGAGCTGATGCGTGGCCTGAAAAAAGAGGGCGACTCCATTGGTGCGAAAGTCACCGTGGTGGCCGACGGCGTGCCGGCTGGCTGGGGCGAGCCGGTATTTGACCGCCTTGACGCCGACATCGCCCACGCGCTGATGAGCATCAACGCGGTGAAAGGCGTCGAAATCGGCGACGGTTTTGACGTGGTCAAGCTTCGCGGCAGCCAGAACCGCGACGAAATCACGAAGGCGGGT +>6 +GTGTTCCGTCCGGGGCACGCGGATTACACCTACGAACAAAAATACGGCCTGCGCGACTATCGCGGCGGCGGGCGTTCATCCGCCCGTGAAACCGCCATGCGCGTCGCGGCAGGCGCTATCGCCAAAAAATATCTGGCGCAGAAATTCGGCGTGGTGATTCGCGGCTGCCTGACCCAGATGGGTGATATTCCGCTGGAAATCAAAGACTGGGATCAGGTAGAGCAAAACCCGTTCTTCTGCCCGGACCCGGATAAAATCGAGGCGCTGGATGAGCTGATGCGCGCTCTGAAAAAAGAGGGCGATTCCATCGGCGCGAAAGTCACCGTGGTGGCCGACAGCGTGCCCGCCGGGCTTGGCGAGCCGGTATTTGACCGCCTGGACGCCGATATCGCCCACGCGCTGATGAGCATTAACGCCGTGAAGGGCGTGGAAATCGGCGACGGTTTCGGCGTGGTGCAACTGCGCGGCAGCCAGAACCGCGACGAAATCACCACTGCCGGT +>7 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAGCCAGGCGCCACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGCGGCCTGCCGGAGGGCGCGGAGATTGCCGTTCAGTTGGAAGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGCCGCTTCTCGCTGTCTACGCTGCCTGCCGCCGATTTCCCGAATCTTGACGACTGGCAAAGCGAAGTTGAATTTACGCTGCCGCAGGCCACGATGAAGCGCCTGATTGAAGCGACCCAGTTTTCGATGGCCCATCAGGATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAGCGAACTGCGCACTGTTGCGACCGACGGCCACCGTCTGGCGGTGTGCTCAATGCCGCTGGAGGCGTCTTTACCTAGCCACTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGTGGCGAAAACCCGCTGCGCGTGCAG +>8 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAACCCGGCGCTACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCCTGCCGGAAGGGGCGGAAATCGCCGTTCAGCTGGAGGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGTCGCTTTTCGCTGTCTACCTTACCGGCAGCAGACTTCCCGAATCTGGATGACTGGCAAAGCGAAGTGGAATTCACGCTGCCTCAGGCGACGATGAAACGCTTGATTGAGGCCACCCAGTTTTCGATGGCCCATCAGGACGTGCGCTACTACCTGAACGGTATGTTGTTTGAAACGGAAGGAAGCGAACTGCGCACCGTCGCGACCGACGGCCACCGTCTGGCGGTCTGTTCAATGCCGCTGGAGGCCTCTTTACCGAGCCATTCAGTGATCGTACCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTTGACGGCGGTGAAAATCCACTGCGTGTACAG +>9 +ATGGAAATGGTGGCGCGCGTTGCGTTGATTCAGCCTCATGAACCAGGCGCAACTACCGTCCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCTTGCCGGAAGGGGCTGAAATTGCCGTCCAGCTGGAAGGCGATCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTCTCGCTTTCCACGCTGCCTGCCGCCGATTTCCCTAATCTGGATGACTGGCAGAGCGAAGTCGAATTCACCCTGCCGCAGGCAACGATGAAGCGCCTGATTGAAGCCACCCAGTTCTCAATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAGACTGAAGGTGAAGAGTTGCGTACCGTCGCGACCGACGGTCACCGTCTGGCGGTCTGCTCTATGCCGGTCGGGCAATCTCTGCCTAACCATTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAGCTGATGCGTATGCTCGACGGCGGCGAAACCCCGCTGCGCGTACAG +>10 +ATGGAGATGGTGGCGCGCGTGGCGCTGATCCAGCCTCATGAACCTGGTGCGACCACCGTTCCGGCGCGTAAATTCTTCGATATTTGCCGTGGATTACCAGAAGGGGCGGAAATTGCCGTTCAACTGGAAGGCGACCGTATGCTGGTGCGTTCTGGCCGCAGCCGTTTCTCGCTGTCTACGCTGCCTGCCGCCGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTCGAATTCACCCTGCCACAGGCGACAATGAAGCGCCTGATTGAAGCCACGCAGTTTTCGATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAAACCGAAGGGGAAGAGTTGCGTACCGTGGCGACCGACGGTCACCGCCTGGCGGTCTGTTCAATGCCTGTCGGTCAGCCGTTGCCTAGCCATTCGGTGATCGTACCGCGTAAAGGTGTGATTGAACTGATGCGTATGCTCGACGGCGGCGATAACCCGCTGCGCGTGCAG +>11 +ATGGAAATGGTGGCACGCGTTGCGCTGGTTCAGCCGCACGAACCAGGGGCGACGACCGTTCCAGCGCGCAAATTCTTTGATATCTGCCGTGGTCTGCCTGAAGGCGCGGAAATTGCCGTGCAGCTGGAAGGTGAGCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTTTCGCTGTCTACCCTGCCAGCGGCGGATTTCCCGAATCTCGATGACTGGCAGAGCGAAGTCGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCGACCCAGTTTTCTATGGCGCATCAGGACGTTCGCTATTACTTAAACGGTATGCTGTTTGAAACCGAAGGTGAAGAACTGCGCACCGTGGCGACCGACGGCCACCGTCTGGCAGTCTGTTCAATGCCAATTGGTCAATCTTTGCCAAGCCATTCGGTGATCGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGCGGCGACAATCCGCTGCGCGTGCAG +>12 +ATGGAAATGATCGCGCGCGTTACGCTGACTCAGCCGCACGACGCGGGCGCGACCACGGTTCCGGCACGTAAATTCTTTGATATTTGCCGTGGGCTGCCGGAAGGCGCTGAAATCGCAGTGCAGCTGGAGGGCGACCGCATGCTGGTGCGCTCTGGCCGCAGCCGTTTCTCCCTCTCCACGTTGCCCGCTGCGGACTTCCCGAACCTGGATGACTGGCAGAGCGAAGTTGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCCACGCAGTTCTCCATGGCGCATCAGGACGTTCGTTACTACTTAAACGGCATGCTGTTCGAAACCGAAGGTGAAGAGCTGCGTACCGTGGCGACCGACGGTCACCGTCTGGCGGTTTGTTCCATGCCGATTGGCGATTCACTGCCAAACCATTCGGTGATCGTACCGCGTAAAGGCGTAATTGAACTGATGCGTATGCTCGACGGCGGTGAAACGCCGCTGCGCGTGCAG +>13 +ATGGAGATGATCGCGCGTGTGGCGCTGTCGCTACCGCACCAGGCGGGCGCGACCACCGTGCCGGCGCGCAAATTCTTCGATATCTGCCGTGGCTTGCCGGAAGGGGCGGAAATCGCCGTTACGCTGGAAGGCGACAGAATGCTGGTGCGCTCCGGGCGCAGCCGCTTCTCGCTGTCTACGTTACCGGCGGCAGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTGGAGTTCACGCTCCCGCAGGCCACCATGAAGCGCCTGATCGAAGCGACCCAGTTCTCCATGGCCCATCAGGACGTGCGGTATTACCTGAACGGGATGCTGTTTGAAACCGAAGGCGAAGAGCTGCGCACCGTGGCGACTGACGGCCACCGTCTGGCGGTATGCGCGATGCCGGTAGGCCAACCGCTGCCAAACCATTCGGTGATTGTACCGCGTAAAGGCGTGCTGGAGCTGATGCGTATGCTCGATGGCGGCGACAGCCCGCTGCGCATTCAG +>14 +TCGGCGCTGACGGAAAACGATCTGGTCTTCGCCCTCTCGCAGCACGCCGTCACCTTTGCAGATGCCGAGCTTCAGCAACAAGGGAAAAGCTGGCCCTCCCTTCCGCGTTATTTTGCCATTGGTCGCACAACGGCGCTGGCGCTGCATACCGTTAGCGGTTTCAATATTCACTACCCTCTGGATCGGGAAATTAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGCGCGCTTATATTACGCGGCAATGGTGGCCGTGAGCTGATAGGTGAAACCCTGACAGCACGCGGAGCTGATGTCGATTTTTGTGAATGTTATCAACGCAGTGCAAAATATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCTCGTGGTGTGACCACGGTGGTTGTCACCAGCGGAGAGATGCTACAA +>15 +GCGGCGCTGGGGGAGAGCGATCTGTTGTTTGCCCTCTCGCAACACGCGGTTGCTTTTGCCCAATCACAGCTGCATCAGCAAGATCGTAAATGGCCCCGACTACCTACTTATTTCGCCATTGGACGCACCACCGCACTGGCGCTACATACCGTAAGCGGACAGAAGATTCTCTACCCGCAGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGTGCGCTGATATTACGTGGCAATGGCGGTCGTGAGCTAATTGGGGATACCCTGACGGCGCGCGGTGCTGAGGTCACTTTTTGTGAATGTTATCAACGATGCGCAATCCATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCCCGCGAGGTGACGACGGTCGTTGTTACCAGCGGTGAAATGTTGCAG +>16 +GCGACGTTGACGGAAAACGATCTGGTTTTTGCCCTTTCACAGCACGCCGTCGCCTTTGCCCACGCCCAACTCCAGCGAGATGGTCGAAACTGGCCTGCGTCGCCGCGCTATTTCGCGATTGGTCGCACCACGGCGCTCGCCCTTCATACCGTTAGCGGGTTCGATATTCGTTATCCATTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGCGCGCTGATTTTGCGTGGCAATGGCGGTCGCGGTCGCGAACTGCTGGGCGAAACCCTGACAGCTCGCGGAGCCGAAGTCAGTTTTTGTGAATGTTATCAACGAAGTGCGAAACATTACGATGGCGCAGAAGAGGCGATGCGCTGGCACACTCGCGGCGTAACGACGCTTGTTGTCACCAGCGGCGAGATGTTGCAA +>17 +GCGGCGCTCACGGACAACGATCTGGTGTTCGCCCTCTCGCAACACGCCGTCGCCTTTGCCCACGCCCAACTGCAACAGCAGGAGCTGGACTGGCCTGTGCAACCACGCTACTTCGCCATCGGGCGCACAACGGCGCTGGCGCTGCATACCGTTAACGGATGCGATATTCGCTATCCTCTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGAGCGCTTATTTTACGGGGCAACGGCGGGCGTGAACTGTTAGGCAAAACCCTCACAGAACGCGGCGCTGAAGTCACCTTTTGTGAATGTTATCAACGCAGTGCAAAACATTACGATGGCGCGGAAGAGGCGATGCGCTGGCACTCTCGCGGCGTGACGACGATTGTTGTCACCAGCGGCGAAATGCTGCAA +>18 +GAAACACTTGGCGATAACGATCTGCTCTTTGCACTTTCTCAACATGCAGTGTCATTCGCCCATGCGCAGTTGCAACAGCAGGGGCTAAACTGGCCATCACTTCCGCATTATTTCGCTATTGGCCGTACTACCGCTCTCGCCCTGCACACCGTAAGCGGACATAAGATTCGCTATCCACAAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCGGAATTACAAAGTATTGCGGGAAAACGCGCACTTATTTTGCGCGGTAACGGCGGCCGTGAATTGATCGGTCAGACGCTGACATCACGTGGTGCCGACGTTACTTTTTGTGAATGTTATCAACGCAGTGCGAAGCATTACGATGGTGCGGAAGAAGCTATGCGCTGGCAGTCTCGCGGCGTAACAACCGTCGTTGTAACCAGCGGTGAAATGCTGCAA +>19 +CGTCTCTTGCAGGAAGGCGATCTGCTCTTTGCGCTGTCGCAGCATGCCGTGGAGTTTGCCCATGCGCAGCTGCAACAGCATGCCGTTAGCTGGCCTCACGCCCCCCGCTATTTCGCCATCGGGCGCACCACGGCGCTGGCCTTACATACCGCGAGCGGAATCGATGTTCGTTACCCGTTAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAACCATTGCCGGAAAGCGCGCGCTCATTTTGCGCGGCAACGGTGGCCGCGAACTGCTGGGCGAAACGCTGCGCGAACGCGGCGCAGACGTGACGTTTGTGGAGTGCTATCAGCGCTGTGCGAAACACTATGATGGCGCGGAAGAAGCAATGCGCTGGCACGCCCGCGGTATTAATACGCTGGTGGTCACCAGCGGTGAAATGTTACAA +>20 +ATTGCGGGATGCCAGAAGGTGGTTCTGTGCTCGCCGCCACCCATCGCTGATGAAATCCTCTATGCGGCGCAACTGTGTGGCGTGCAGGAAATCTTTAACGTCGGCGGCGCGCAGGCGATTGCCGCTCTGGCCTTCGGCAGCGAGTCCGTACCGAAAGTGGATAAAATTTTTGGCCCCGGCAACGCCTTTGTAACCGAAGCCAAGCGTCAGGTCAGCCAGCGTCTCGACGGCGCGGCTATCGATATGCCAGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCAACACCGGATTTCGTCGCTTCTGACCTGCTCTCCCAGGCTGAGCACGGCCCGGATTCCCAGGTGATCCTGCTGACGCCGGATGCTGACATTGCCCGCAAGGTGGCGGAGGCGGTAGAACGTCAACTGGCGGAACTGCCGCGCGCGGGCACCGCCCGGCAGGCCCTGAGCGCCAGTCGTCTGATTGTGACCAAAGATTTAGCGCAGTGCGTC +>21 +ATTGCCGGATGCAAAAAAGTGGTGTTGTGCTCGCCACCGCCTATCGCGGATGAAATCCTTTACGCTGCGCAGCTGTGCGGCGTGCAGGAAATCTTCAACGTCGGCGGCGCCCAGGCCATTGCCGCTCTGGCGTTCGGCAGCGAATCCGTGCCAAAAGTGGACAAAATTTTTGGCCCCGGCAACGCGTTTGTCACCGAGGCGAAACGCCAGGTCAGCCAGCGTCTCGACGGCGCGGCAATTGATATGCCTGCCGGCCCTTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCCACGCCAGATTTCGTGGCGTCTGACCTGCTCTCTCAGGCGGAACACGGCCCGGATTCTCAGGTCATCCTGCTGACCCCGGATGCCGGTATTGCGCAGAACGTCGCAGAGGCCGTCGAACGCCAGTTAGCGGAGTTACCGCGTGCAGAAACGGCGCGTCAGGCATTAAGCGCCAGCCGTCTGATCGTGACGAAAGACTTAGCCCAGTGCGTC +>22 +ATTGCAGGCTGTAAAAAAGTGGTGTTGTGCTCTCCCCCACCTATCGCCGATGAAATTCTGTATGCTGCGCAGCTCTGCGGCGTACAGGATGTGTTTAACGTTGGGGGCGCACAAGCTATTGCCGCGCTGGCATTTGGCAGTGAATCCGTGCCGAAAGTGGACAAAATTTTTGGCCCCGGTAATGCCTTTGTGACCGAAGCCAAACGTCAGGTGAGTCAGCGTCTGGACGGCGCCGCCATCGATATGCCAGCAGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCCACGCCGGATTTCGTTGCCTCTGACTTACTCTCGCAGGCCGAACACGGCCCCGATTCCCAAGTGATCCTGCTGACGCCGGATGCCGGTATGGCCAGCCGGGTTGCTGAAGCAGTAGAACGCCAGCTTGCAGCGCTGCCACGCGCTGAAACCGCGCGGCAGGCGTTAAGCGCCAGTCGTCTGATTGTCACCCGCTCCCTTGCGCAATGCGTA +>23 +ATTGCGGGCTGTAAAAAAGTGGTGCTGTGCTCACCGCCGCCGATTGCCGATGAGATCCTTTACGCGGCGCAGCTGTGCGGTGTGCAGGACGTGTTTAACGTCGGCGGCGCACAGGCCATTGCCGCGCTGGCGTTTGGTACAGAATCCGTGCCGAAAGTGGACAAAATCTTCGGGCCAGGTAACGCCTTTGTCACCGAGGCAAAACGTCAGGTGAGCCAGCGTCTGGACGGTGCGGCGATCGATATGCCCGCAGGCCCGTCGGAAGTGCTGGTGATTGCTGACAGCGGCGCAACGCCGGATTTCGTGGCTTCTGATTTGCTCTCCCAGGCTGAACACGGCCCGGACTCTCAGGTGATTTTACTGACGCCCGCTGCTGATATGGCGCGTCGCGTAGCCGAAGCTGTCGAACGCCAGCTGGCAGAACTGCCGCGAGCTGAAACCGCCCGCCAGGCACTGAACGCCAGCCGCCTGATCGTGACTAAAGATTTAGCGCAGTGCGTG +>24 +ATTGCCGGTTGTCAGAAGGTGGTGCTCTGCTCTCCTCCACCGATCGCCGATGAGATCCTGTACGCGGCGAAGCTGTGCGGCGTGCAGGCGATCTATAAAGTGGGCGGTGCGCAGGCGATTTCTGCCCTGGCGTTCGGAACAGTATCCATTCCTAAGGTCGACAAAATCTTTGGCCCGGGCAATGCCTACGTGACCGAGGCGAAGCGCCAGGTCAGCCAGCGTCTGGACGGCGCGGCGATTGATATGCCTGCCGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCTACACCGGATTTCGTGGCCTCTGACCTGCTCTCGCAGGCCGAGCACGGCCCTGACTCGCAGGTGATTTTACTGACGCCAGATGCCGACATGGCAAAACGCGTGGGCGACGCCGTTGAGCGTCAGCTGGCTGACCTGCCGCGGGCGGAAACGGCGCGTCAGGCGCTATCCGCCAGCCGCCTGATTGTGGCCCGCGATCTTGACCAGTGCATC +>25 +ATCGCCGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTACGCCGCGCAACTCTGTGGCGTGAAAGAAGTGTTTAACGTGGGTGGCGCACAGGCCATTGCCGCGCTGGCGCTGGGCACGGAGTCTATTCCAAAAGTCGATAAAATCTTTGGGCCGGGCAACGCCTATGTGACCGAAGCCAAGCGCCAGGTCAGCCAGCGTCTTGACGGCGCGGCAATCGATATGCCCGCCGGACCGTCCGAAGTATTGGTTATCGCCGACAGCGGCGCAACGCCGGATTTTGTCGCCTCCGACCTGCTTTCTCAGGCCGAGCACGGCCCAGACTCGCAGGTGATCCTGCTGACGCCGGACGCTAAGCTTGCCGAGGGCGTGGCCGAAGCCGTTGAACGCCAGCTCGCCGAGCTGTCCCGCGCCGACACCGCGCGTCAGGCGCTCTCCGCCAGCCGTTTAATCGTAGCGAAAGATCTGGCGCAGTGCGTG +>26 +ATCGCGGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTATGCGGCGCGTTTGTGCGGGGTACAGCAGGTCTATCAGGTGGGCGGCGCTCAGGCCATCGCGGCGCTGGCGTTTGGCACCGAGACCGTACCCAAAGTGGACAAAATCTTCGGGCCGGGCAATGCGTTTGTCACCGAAGCCAAACGTCAGGTCAGCCAGCGGCTGGATGGCGCGGCGATTGATATGCCTGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGATAGCGGCGCGACCACGGATTTCGTGGCCTCGGATTTGCTGTCCCAGGCGGAACACGGCCCGGATTCGCAGGTGATCCTGCTGACACCGGACAGCGCCATGGCGCAGGCGGTGGCCGACGCGGTTGAGCGTCAACTCGCCGAACTGCCGCGCGCGGAAACAGCTCGCCAGGCGCTGGCGGAAAGCCGCCTGATTGTGGCGCGCGATTTAGCGCAGTGCGTG +>27 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAATTTTTGACATTCTGGATATTCCGCACCATGTCGAAGTGGTTTCTGCTCACCGTACCCCCGATAAACTGTTCAGCTTTGCCGAAAATGCTGAAGAAAACGGCTTTCAGGTAATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCAGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTTGGCGTTCCGGTACAAAGCGCTGCGCTAAGCGGTGTGGACAGTCTCTATTCTATTGTACAGATGCCGCGCGGTATTCCGGTTGGCACACTGGCCATCGGCAAAGCTGGCGCCGCTAACGCGGCGCTGCTGGCGGCGCAAATTCTGGCCACCCACGATAACGCACTGCATCAGCGCCTTCGCGAC +>28 +AGCGACTGGACTACCATGCAATTCGCCGCCGAAATTTTTGAAATTCTGGATGTTCCGCACCATGTAGAAGTGGTTTCCGCCCATCGAACCCCTGATAAACTGTTCAGCTTCGCCGAAACGGCGGAAGAGAACGGATATCACGTGATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACATTGGTGCCGGTACTCGGCGTTCCGGTACAAAGCGCAGCATTAAGCGGTGTGGATAGCCTTTACTCCATTGTTCAGATGCCGCGTGGCATTCCGGTGGGTACACTGGCTATCGGCAAAGCCGGGGCTGCGAACGCCGCGCTGCTGGCAGCGCAAATTTTGGCCACACACGATAATGCGCTGCACCAGCGCCTGAGCAAC +>29 +AGCGACTGGGCTACCATGCAGTTCGCCGCAGAAATCCTCGATATTCTGAACGTACCTCACCATGTTGAAGTGGTTTCCGCCCACCGCACGCCCGATAAACTGTTCAGCTTCGCCGAAGACGCCGAAAGCAACGGTTATCAGGTGATTATTGCCGGTGCCGGCGGCGCTGCGCACTTACCCGGAATGATTGCCGCCAAAACGCTGGTCCCGGTATTAGGTGTACCCGTCCAGAGCGCCGCATTAAGCGGTGTCGATAGCCTCTACTCCATCGTGCAGATGCCGCGCGGCATTCCGGTCGGTACGCTGGCGATCGGTAAAGCCGGTGCCGCTAACGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>30 +AGCGACTGGGCTACCATGCAGTTCGCCGTCGAAATCTTCGAAATCCTGAATGTCCCGCACCACGTTGAAGTGGTTTCTGCTCACCGCACCCCCGATAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTATCAGGTGATTATTGCGGGCGCAGGCGGCGCAGCGCACCTGCCAGGCATGATTGCCGCCAAAACGCTGGTGCCGGTGCTGGGCGTGCCAGTACAGAGCGCCGCACTGAGCGGTGTCGATAGCCTCTACTCCATCGTACAAATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATTGGTAAAGCTGGCGCGGCAAACGCGGCATTACTGGCAGCACAAATTCTCGCGACTCACGATAAAGAGCTACACCAGCGTCTGAATGGC +>31 +AGCGACTGGGCTACCATGCAGTTTGCCGCCGAAATCTTCGATATCCTGAACGTTCCACACCACGTTGAAGTGGTTTCCGCACACCGCACCCCCGATAAGCTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAAGGGTTATCAGGTGATTATTGCCGGTGCTGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTGGGCGTGCCGGTGCAAAGCGCTGCGCTGAGCGGCGTGGACAGCCTCTACTCTATCGTCCAGATGCCGCGCGGCATTCCGGTCGGCACGCTGGCGATCGGCAAAGCGGGCGCGGCGAACGCGGCGTTACTGGCAGCGCAAATTCTGGCGACACACGATAAAGACCTGCGCCAACGTCTGGCGGAC +>32 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTCGAAATGCTGGACGTTCCGCACCATGTTGAAGTCGTCTCAGCCCACCGTACCCCTGATAAACTGTTCAGCTTCGCCGAAAGCGCTGAAGAAAACGGTTATCAGGTTATTATTGCGGGTGCTGGCGGTGCAGCGCATCTGCCGGGCATGATTGCAGCGAAAACGCTGGTCCCCGTGTTAGGCGTTCCGGTACAAAGCGCAGCGTTGAGCGGCGTAGATAGCCTCTACTCAATCGTGCAGATGCCACGCGGCATCCCCGTGGGTACGCTGGCGATTGGGAAAGCGGGTGCGGCAAATGCGGCCCTGCTGGCAGCACAAATTCTGGCAACACACGACAAAGCATTACATCAGCGTCTGAGCGAC +>33 +AGTGACTGGGCAACCATGTCTCATGCCGCAGATGTATTAGATACACTACAAATTCCTTACCATGTTGAGATTGTCTCTGCACACCGAACCCCTGATAAGTTATTTAGTTTTGCTGAAAAAGCAAAAAGTAATGGCTTTGATGTCATTATTGCTGGTGCAGGAGGAGCTGCCCATTTACCAGGAATGCTTGCAGCTAAAACGTTAGTACCCGTATTTGGTGTTCCTGTTCAAAGTGCGACATTAAGCGGTGTTGATAGCCTCTATTCAATCGTACAAATGCCAAAAGGTATCCCTGTAGGAACCTTAGCGATTGGTAAAGCAGGGGCTGCCAATGCGGCTTTATTAGCGGCTCAAGTTTTAGCGTTACATTCTCCTGCTATTTTAGATGCATTGACTGCA +>34 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTTGAAATCCTGAATGTTCCGCACCACGTCGAAGTGGTTTCCGCACACCGTACCCCGGACAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTACGAGGTGATCATTGCCGGTGCGGGCGGCGCAGCACATCTGCCGGGCATGATTGCCGCCAAAACGCTGGTGCCGGTACTGGGTGTTCCCGTGCAAAGCGCCGCGTTAAGCGGGGTGGATAGCCTTTACTCTATTGTCCAGATGCCGCGCGGTATTCCTGTCGGTACCCTGGCGATTGGTAAAGCAGGTGCGGCAAATGCCGCCCTGCTGGCCGCGCAGATCCTGGCGACGCATGATAAAGATTTGCACCAGCGTCTGGCGGAG +>35 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAACGGCGGAAGAGAACGGATATCAAGTGATTATTGCCGGCGCGGGCGGCGCGGCGCACCTGCCGGGAATGATTGCGGCAAAAACGCTGGTCCCGGTACTCGGCGTGCCGGTACAAAGCGCTGCGCTAAGCGGCGTGGATAGCCTTTACTCCATTGTGCAGATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATCGGTAAAGCCGGTGCGGCTAATGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>36 +AGCGACTGGGCCACCATGCAGCATGCCGCTGAAATTCTTGATGCCCTTGATGTTCCTTACCATGTTGAAGTGGTTTCCGCTCACCGCACGCCTGATAAGCTTTTCAGCTTTGCTGAATCCGCGCAGCACAACGGTTATCAGGTGATTATTGCTGGCGCAGGCGGTGCGGCGCATCTGCCGGGCATGATCGCCGCGAAAACCCTGGTGCCGGTATTAGGCGTGCCGGTGCAAAGCGCGGCCCTGAGCGGCGTGGACAGCCTCTACTCTATCGTGCAAATGCCGCGCGGCATTCCGGTAGGGACGCTGGCGATCGGCAAAGCGGGTGCTGCAAACGCCGCACTGCTGGCGGCGCAGATCCTCGCCCAGCATGACGATGCGCTACTGGCGCGTCTGGCGGCA +>37 +AAACGCTTCCTGAACGAACTGACCGCCGCTGAAGGGCTGGAACGTTATCTGGGCGCCAAATTCCCGGGTGCGAAACGTTTCTCGCTCGAGGGGGGAGATGCGCTGATACCTATGCTGAAAGAGATGGTTCGCCATGCGGGTAACAGCGGCACTCGCGAAGTGGTGCTGGGGATGGCGCACCGCGGTCGTCTGAACGTGCTGATCAACGTACTGGGTAAAAAACCGCAGGATCTGTTCGACGAGTTTGCCGGTAAACATAAAGAACATCTGGGTACCGGCGACGTGAAGTATCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGTCTGGTTCACCTGGCGCTGGCGTTTAACCCATCGCATCTGGAAATTGTGAGCCCGGTGGTGATGGGCTCCGTGCGCGCCCGTCTGGACCGACTGGACGAACCGAGCAGTAATAAAGTGCTGCCGATCACTATTCACGGCGACGCCGCGGTGACCGGCCAGGGCGTGGTTCAG +>38 +AAACGCTTCCTGAACGAACTGACCGCTGCAGAAGGGCTGGAACGTTATCTGGGGGCAAAATTCCCTGGCGCGAAACGTTTTTCGCTGGAAGGCGGCGATGCGTTAATTCCGATGCTCAAAGAGATGGTCCGCCATGCGGGCAACAGCGGCACCCGCGAAGTGGTGTTGGGAATGGCGCACCGTGGTCGCCTGAACGTACTGGTCAACGTGCTGGGTAAAAAACCTCAGGATCTGTTTGACGAGTTTGCCGGTAAACATAAAGAACATTTGGGCACCGGCGACGTGAAGTACCATATGGGTTTCTCGTCGGATATCGAAACCGAAGGCGGACTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTCAGCCCGGTAGTGATGGGGTCTGTGCGCGCACGTCTCGACCGGCTCGACGAACCGAGCAGCAACAAAGTGTTGCCAATCACCATTCATGGTGATGCAGCAGTTACCGGGCAGGGCGTGGTTCAG +>39 +AAACGCTTCTTAAGCGAACTGACCGCCGCTGAAGGCCTTGAACGTTACCTCGGCGCAAAATTCCCTGGCGCAAAACGCTTCTCGCTGGAAGGCGGTGACGCGTTAATCCCGATGCTTAAAGAGATGATCCGCCACGCTGGCAACAGCGGCACCCGCGAAGTGGTTCTCGGGATGGCGCACCGTGGTCGTCTGAACGTGCTGGTGAACGTGCTGGGTAAAAAACCGCAAGACTTGTTCGACGAGTTCGCCGGTAAACATAAAGAACACCTCGGCACGGGTGACGTGAAATACCACATGGGCTTCTCGTCTGACTTCCAGACCGATGGCGGCCTGGTGCACCTGGCGCTGGCGTTTAACCCGTCTCACCTTGAGATTGTAAGCCCGGTAGTTATCGGTTCTGTTCGTGCCCGTCTGGACAGACTTGATGAGCCGAGCAGCAACAAAGTGCTGCCAATCACCATCCACGGTGACGCCGCAGTGACCGGGCAGGGTGTGGTTCAG +>40 +AAACGCTTCCTCAGCGAACTGACTGCAGCGGAAGGTCTGGAACGCTACCTGGGCGCGAAATTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGTGATGCGTTAATCCCAATGCTCAAAGAGATGATCCGCCACGCCGGTAACAGCGGTACCCGTGAAGTGGTACTGGGTATGGCGCACCGTGGTCGTCTGAACGTCCTGGTTAACGTGCTGGGTAAAAAGCCGCAGGATCTATTCGACGAATTTGCGGGCAAACATAAAGAACACCTCGGTACCGGTGACGTGAAGTACCACATGGGCTTCTCATCGGATATCGAAACCGAAGGCGGTCTGGTGCATCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTTATCGGTTCCGTACGTGCACGCTTGGATCGTCTGGACGAGCCGAGCAGCAATAAAGTGCTGCCAATCACTATTCATGGTGATGCGGCAGTAACCGGGCAAGGCGTGGTTCAG +>41 +CGTACTTTCCTTGAAGAGCTGACTGCCGCTGAAGGTTTAGAGCGCTATCTTGGTGCGAAATTCCCTGGTGCTAAACGTTTCTCTCTCGAAGGGGGGGATGCCTTAGTTCCGATGACCAAAGAGATGATCCGTCACGCGGGTGCCAGTGGCATGCGTGAAGTGGTGATTGGGATGGCGCACCGCGGTCGCTTGAACATGCTGGTCAACGTTCTGGGTAAAAAACCGCAAGATCTGTTTGATGAGTTTGCCGGTAAACATGGCGAAGGCTGGGGCACAGGTGATGTGAAATATCACCAAGGTTTCTCCGCTGACTTTGCGACACCGGGCGGTGATGTTCACTTAGCACTGGCTTTCAACCCATCGCATCTTGAGATTGTGAACCCTGTTGTGATGGGTTCAGTTCGCGCGCGTCAAGACCGCCTAGGTGATGAAGATGGCAGTAAAGTGCTACCTATCACTATCCATGGTGACTCTGCGATTGCCGGACAAGGTGTGGTGGCT +>42 +AAACGCTTCCTGAGCGAGCTGACCGCAGCCGAAGGCCTTGAGCGCTACCTGGGCGCGAAGTTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGCGACGCGCTGATCCCGATGCTGAAAGAGATGATTCGCCACGCGGGCAACAGCGGCACGCGTGAAGTGGTGCTGGGTATGGCGCACCGCGGTCGTCTTAACGTGCTGGTTAACGTGCTGGGTAAAAAACCGCAGGACCTGTTCGACGAGTTCGCGGGCAAACACAAAGAACACCTTGGCACCGGCGACGTGAAGTACCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGCCTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTAATTGGTTCGGTACGTGCCCGTCTGGATCGGCTGGACGAGCCGAGCAGCAACAAAGTACTGCCGATCACCATTCACGGCGACGCCGCGGTGACCGGTCAGGGCGTGGTTCAG +>43 +GTGCTGGGCCGTAATGGTTCCGACTATTCCGCCGCCGTGCTGGCCGCCTGTTTACGCGCTGACTGCTGTGAAATCTGGACTGACGTCGATGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGACTGCTGAAATCGATGTCCTACCAGGAAGCGATGGAACTCTCTTACTTCGGCGCCAAAGTCCTTCACCCTCGCACCATAACGCCTATCGCCCAGTTCCAGATCCCCTGTCTGATTAAAAATACCGGTAATCCGCAGGCGCCAGGAACGCTGATCGGCGCGTCCAGCGACGATGATAATCTGCCGGTTAAAGGGATCTCTAACCTTAACAACATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGGATGATTGGGATGGCGGCGCGTGTTTTCGCCGCCATGTCTCGCGCCGGGATCTCGGTGGTGCTCATTACCCAGTCCTCCTCTGAGTACAGCATCAGCTTCTGTGTGCCGCAGAGTGACTGC +>44 +GTGCTGGGGCGTAACGGTTCCGACTATTCCGCTGCGGTACTGGCCGCCTGTTTACGCGCCGACTGTTGCGAAATCTGGACGGACGTTGACGGTGTGTATACCTGCGACCCGCGCCAGGTGCCGGATGCCAGACTGCTGAAGTCAATGTCCTATCAGGAAGCGATGGAACTTTCCTACTTCGGCGCCAAAGTGCTTCACCCGCGTACCATTACTCCCATCGCTCAATTCCAGATCCCATGTCTGATAAAAAATACCGGTAATCCGCAAGCGCCGGGCACGCTGATTGGCGCCAACAGCGATGAAGACGGGCTACCGGTAAAAGGCATCTCGAACCTCAATAATATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGCATGGTCGGGATGGCGGCGCGCGTGTTCGCCACCATGTCGCGTGCCGGGATTTCGGTAGTGCTGATCACCCAATCCTCTTCGGAGTACAGCATCAGCTTCTGCGTGCCGCCAAAGCGATGC +>45 +GTGCTGGGCCGTAACGGCTCCGATTATTCCGCCGCCGTACTGGCCGCCTGTTTACGCGCTGACTGTTGTGAAATCTGGACTGACGTCGACGGCGTGTATACCTGCGACCCGCGTCAGGTGCCAGACGCCAGGCTGCTGAAGTCGATGTCTTATCAGGAAGCAATGGAGCTTTCTTACTTCGGCGCTAAAGTACTACATCCGCGCACTATTACTCCTATTGCCCAGTTCCAGATCCCTTGTCTGATTAAAAATACCGGCAATCCACAAGCGCCCGGTACGCTGATCGGCGCTGCCAGCGACGATGATGCTCTGCCGGTTAAAGGGATTTCTCACCTTAACAACATGGCGATGTTTAGTGTCTCCGGTCCGGGGATGAAAGGCATGGTGGGTATGGCGGCGCGCGTTTTTGCCGCTATGTCACGTGCGGGAATCTCGGTGGTGTTGATCACGCAATCTTCATCTGAATACAGCATCAGCTTCTGCGTGCCGCAGAGCGACTGC +>46 +GTGCTGGGCCGCAACGGTTCTGATTACTCCGCTGCGGTGTTGGCTGCCTGCTTACGCGCCGACTGTTGTGAGATCTGGACTGACGTTGACGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGGTTGCTGAAGTCGATGTCCTATCAGGAGGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTCCTTCATCCTCGCACCATCACCCCCATTGCCCAGTTCCAAATCCCATGCCTGATTAAAAACACCGGAAACCCGCAGGCCCCTGGTACGCTGATCGGCGCCAGCGTGGATGAAGACGAACTGCCGGTGAAAGGGATCTCGAACCTGAACAATATGGCGATGTTCAGCGTTTCCGGCCCAGGAATGAAAGGGATGATCGGGATGGCGGCGCGCGTCTTCGCGGCAATGTCCCGCGCGGGGATCTCCGTGGTGCTGATCACGCAATCCTCTTCTGAATACAGCATCAGTTTCTGCGTACCGCAGGGCGACTGC +>47 +GTGTTGGGGCGCAATGGCTCTGACTACTCTGCCGCTGTGCTGGCTGCCTGTTTACGCGCGGACTGTTGTGAGATCTGGACCGATGTCGACGGCGTATATACCTGCGATCCGCGCCAGGTACCCGATGCCCGACTGCTGAAGTCGATGTCTTATCAGGAAGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTTCTGCATCCGCGCACCATTACCCCAATTGCCCAGTTCCAGATCCCGTGCCTGATTAAAAATACCGGCAATCCACAAGCGCCTGGCACGTTGATCGGCGCCAGCAGTGATGAAGACGATTTGCCGGTAAAAGGTATTTCTAACCTCAATAACATGGCGATGTTTAGCGTCTCCGGCCCTGGAATGAAAGGCATGGTAGGCATGGCGGCGCGCGTTTTTGCCGCGATGTCGCGTGCGGGCATCTCGGTGGTGCTGATCACGCAGTCTTCTTCTGAATACAGCATCAGCTTCTGCGTTCCGCAGGGCGACTGC +>48 +GTATTAGGTCGCAATGGTTCAGACTACTCAGCTGCAGTATTAGCAGCCTGTTTACGTGCTAAATGCTGTGAAATTTGGACTGATGTTGACGGTGTTTATACTTGTGATCCACGTTTAGTGCCTGATGCACGTTTGTTAAAAGGCATGTCATATCAAGAGGCAATGGAACTGTCTTACTTTGGTGCCAAGGTACTTCATCCTCGTACAATTGCGCCTATTGCCCAATTCCAAATACCTTGTTTAATTAAAAATACGGGCAATCCAGATGCGCCGGGTACCTTGATTGGTGATGGTCAAAAAGATGAGAGCACACCTGTTAAAGGAATAACTAACCTTAATAATATGGCAATGATCAACGTATCTGGGCCTGGAATGAAAGGAATGGTAGGAATGGCGGCTCGCGTGTTCTCGGTAATGTCGAGAGCGGGGATTTCAGTTGTTCTAATCACACAGTCTTCTTCTGAATACAGCATTAGTTTTTGTGTGCCACAAAAAGAGCTG +>49 +GTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGCTGCCTGTTTACGCGCCGATTGTTGCGAGATTTGGACAGACGTTGACGGGGTCTATACCTGCGACCCGCGTCAGGTGCCCGATGCGAGGTTGTTGAAGTCGATGTCCTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCCCGCACCATTACCCCCATCGCCCAGTTCCAGATCCCTTGCCTGATTAAAAATACCGGAAATCCTCAAGCACCAGGTACGCTCATTGGTGCCAGCCGTGATGAAGACGAATTACCGGTCAAGGGCATTTCCAATCTGAATAACATGGCAATGTTCAGCGTTTCCGGCCCGGGGATGAAAGGAATGGTTGGCATGGCGGCGCGCGTCTTTGCAGCGATGTCACGCGCCCGTATTTCCGTGGTGCTGATTACGCAATCATCTTCCGAATACAGTATCAGTTTCTGCGTTCCACAAAGCGACTGT +>50 +GTGCTCGGGCGCAACGGCTCCGATTATTCCGCAGCGGTACTGGCAGCGTGTTTACGCGCCGATTGTTGCGAGATCTGGACTGATGTCGATGGTGTCTATACCTGCGACCCACGTCAGGTACCGGATGCCCGATTACTTAAGTCGATGTCGTACCAGGAGGCTATGGAACTCTCCTATTTCGGCGCCAAAGTCCTCCATCCTCGAACCATCACTCCCATCGCCCAGTTCCAGATTCCCTGCCTGATAAAAAATACCGGAAACCCGCAAGCACCAGGAACGCTGATTGGCGCCAGCCGCGACGAAGATGATCTGCCGGTGAAGGGCATTTCAAATCTCAATAATATGGCGATGTTCAGCGTCTCCGGGCCGGGGATGAAGGGAATGGTCGGCATGGCTGCTCGCGTGTTTGCGGCAATGTCTCGCTCAGGAATTTCGGTAGTCCTGATTACGCAATCCTCCTCTGAGTACAGCATTAGCTTCTGTGTACCGCAGGCTGACTGT +>51 +GTGCTGGGGCGTAACGGCTCTGACTACTCCGCCGCCGTGCTGGCGGCCTGCTTACGCGCGGACTGCTGTGAGATCTGGACTGACGTCGACGGCGTTTATACCTGCGATCCGCGCCAGGTACCGGACGCCAGGCTGCTGAAGTCGATGTCGTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCGCGTACCATCTCCCCGATTGCCCAGTTCCAAATCCCTTGCCTGATTAAGAATACCGGTAACCCTCAGGCGCCGGGCACGCTGATTGGCGCCAGCGCGGATGAAGATGAACTGCCGGTGAAAGGCATTTCTAACCTCAATAACATGGCGATGTTCAGCGTCTCCGGCCCGGGGATGAAGGGCATGGTCGGCATGGCGGCACGCGTATTTGCCGCTATGTCCCGCAACGGGATCTCCGTGGTGCTGATCACGCAGTCTTCTTCCGAATACAGCATCAGCTTCTGCGTTCCGCAGGGTGATTGC +>52 +GTATTAGGCCGTAACGGTTCCGACTACTCCGCCGCCGTGCTGGCCGCGTGTTTGCGCGCCGACTGTTGTGAGATCTGGACTGACGTCGACGGCGTCTATACCTGCGACCCGCGCCAGGTGCCGGACGCCAGGCTGCTGAAGTCGATGTCGTATCAGGAAGCCATGGAACTCTCCTACTTCGGCGCTAAAGTTCTCCACCCCCGCACCATTGCCCCCATCGCCCAGTTCCAAATCCCCTGTCTGATCAAAAACACTGGTAACCCGCAAGCGCCAGGCACCCTGATCGGTGCCAGCAGCGATGAAGACGGCCTGCCGGTGAAGGGCATCAGTAACCTGAATAATATGGCGATGTTCAGCGTCTCTGGTCCGGGCATGAAAGGCATGGTGGGAATGGCGGCGCGCGTGTTCGCGGCGATGTCCCGTGCGGGCATCTCGGTGGTGCTGATCACCCAATCGTCTTCTGAATACAGCATCAGCTTCTGCGTGCCGCAGGCCGACAGC diff --git a/locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.ndb b/locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.ndb new file mode 100644 index 0000000000000000000000000000000000000000..dfa7d2e267e27fdcac41d817c56044823912df6b GIT binary patch literal 20480 zcmeI%u}*_f6adgGnn+wQ#(ywwF7Ez{QB0atSk$?{)y4En{D3a@50nD0P>05(Iy5;9 zy!&{M%k5#hB_bVp8+{VEZl~uF`CS|BJbm@Y&P}7mw14ZFqjezx0t5&U zAV7cs0RjXF5FoI>z|eR9NB=)$AEK`2tS>!{C;j~AzWslp33bORK$56a26G33lxW~VPj%q;tg;sy%comRr(uEevFOhThS)()6H3s z6>seN|G!V$0yw3B1Wqd;f-?%p;H&~dIH!OV&MP2> z3kt~Lq5^`rq<|zYBhZn=6$K=5RRKv{Q$P~e6_CUY1tf7(0ZD8qAckiNjy+M5)T!S#HIq0c%*}tG zZ^sMi6f4nz(xlPXYgh@Z&oHb@XD#FU)&IfI*Y>wVT&>YxPr&nmV7~6!xPjbe;7A0gSS#c1h+7YcT!^%w=s_o zQs4wev51dSQy0duf=|-XQ@Dd?*pZH%#)SS9@{s-)!w2|D{!^f4PZ_)9Tf|rLTNJwk#BTooH4p9;?y@ZRlXt;Imt1zmfvc{$?uMIgx$TZacinU3 wz6Tz9Z*opT}eelsIpMCMwH{bp6(=WgMapr&Q3tcn?%>V!Z literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.nsq b/locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.nsq new file mode 100644 index 0000000000000000000000000000000000000000..7fc5e3b8e6fafd8419a1bdda10966df4ce2396be GIT binary patch literal 6297 zcmZ9Mk3W-p|Hn6KrJ`R5o!jXgnzNjv&Ns@A(o}>_DwShXLu_kiQ(xyW9GXe`!NxHA zk)0e|rLs<^qAApUQm1oQ;>2k>gtg6b7dlbPee``_*Vg%-$Ndjn&pn^7_h&fU?yf3w z)y>@g_lZg&z0+@j-e8eXQ193s( zCt37>1RIlK%0=ZTQ{iTk#0hMf?Y1WEO+e(QDM92EsP-xy!p*!m9%CkAXH}HyS zDnQM^k(N%2$!BfaKI=wncT_Do{_gI*p>sGHGN5x(vKc%)LRK0R+F#`;{hW>R`#{;VsepQ=fwXjqR%F{|vn;5!`(c&qz}?-0p>s?bvI9KDzLYIY zjBWrrxM%LeO(^@}j`)3MXmc}(7TY2#76IH1`;o{kwn#>E-ZN}=i{q!fggrAM#JEh<0j{B0I+gw}{cx7nn>c1CW_g;XD z7TIpXEyII*Eji)l?l{;zL-q$#k;KYoFkyEkBB>J><5s3c3#Lj3*D5lgyn11GCX}z2 zG<;cg_rwF8hVG7_ba@}p#h5$LiF!sIxC4EN zQHaskxbkKxPXZl26=bl`gWYL2zVN7*D{meAS{kvGRCa(zV+vlsxu;jqY0H>L%qg3P zNJZ5uO|vOknyr0)DLVl^kLcd6dLpT>qqVSrZth^ZcYgrBIRF1%O?UIE04OnSn1nI zhcJ2wxc`klU3ePkJd|F6fIGdXGe&=BD+hH~Tg$TxE#+<1B}zl)D{08|WwQ8`NG1i{ zy{V5rizzVk!iKz$!tSX1J1CzKhQRJ=7WXl5KkQHJ!*;(9^yU*|*zVJXSC(Xy03CL( z0s6ML51#1lfd04L*@yhF-EE8D>iAzvLtN`*+t1TNuD^Z*?kd_WmO$$RyXThz9d+;O z4KY&<1zovVdAPd2=pmbXBT%}f{3D=$oMCZqL|U68qTeQT=Vmwu-KPv%iw!@9}5YY!4NxOSIe$xI?yRovV3!jW_SQsllP#(6je>&y{Z-=uku zb{ezgpnk(f)fP{WWQX$jw%hI6tyLw#C8iT!#@JOY5vMyzicZ`A4MyRy3dfo@APs~2 z-&Q*fAWY5RrGqFeL2keGJ*-QncHyL4i{uDKBsf_LYSwCw?eJtY#OgzK{v+(jaGZOE zk|A%`it9^4I(zfGD*^Wivt`r9MDiua z66B`&TI9yzoXKFKOk}oEutE;G^j(FH(=0UiAkXhJ-77THU2ki*)LA>;sFt;}{Gj9C z)o^=R?PF(x5Trlr7m8EI5~jTWQmnP(Ru_|ECPG$~^eT8R2_u&x*#cuRPO!;T=+c*N z{;+r2)2mw<5s589o@e&EmvxMnwrksKFSJ!^=^LTr3Ng!X-g}sH-p$$QXq(#2iW`Wu z|4mP)>wqs$6Mqfsn5Yp#N5xDpvP#mcNS+sy@&j}P825=pC#1*Au8&Am1$pYP zsVYKtQQJ|o5V`5wFR4dQJ{n1Jy7nFh7cxDNX2WD{H7kPD(zuLgaju3qru?HTDKCzn z=wxXGoBYAd6U@pL?Q>Y_OXz6xaDhqWQ1A0umD24ySR&g!_u0i+O}ud1gM=QooFtZ} z*G@?GcDD(dR?M9$XA@%o?QFN6>m3%>7>}^;73!DPar-sHMbZWA;^68D?dyEwH{tw} z+I{2UIy#?Gty-v0^hS17O$2~%UfF+F_W z{Mvo5!+)jocU4O-8bTK*9*c=%EGHgsQ%dj%m$Zf$8N+{k*yh8fC_Ow2qjzmjWr%G1 z?q^8S70KZNt%SFAC3fvnR?U>8zgsWB=p2XG7tVGRuJ`e<#wvumrBLr#kGhM&y^K*k zVNNf&ofaNFzczY&J?1XGSiAJBQMh>t!|SPB@2bKx*`#Zjdob%O!v-ZjFbSn+RXS4> z^dgVgTWoND5x%$;+)Kcng-l&og3;|KYP|`u;Ev;Z9|gLaT~w$~tmU2rx(dnz-Sk?F zmIwFraNU3S43=so!c{I6X2mhQYI5~56`sK$xf3F({Ko}Y`P@L-WT>|~z4Ev12@H`3 z{T5q@mQP5vdB%a=`@TXZazA}I{?CSa_EY6`4!e2IBq3J53So<({9o=TV0Tk`z7gC@ zYWZmx-9x&t2Cu9;95cxEI+d?iDA4jJx`94f%UnIMxin}M&EhUa-EH^Wq86t&@oZ+d z5}uhX?wKXu|2~duSYba^TSwRp<=12M0fcQ|sAp7h<1P;0L$8ieJ*E97-}oA(hXNh0 zuIhxB{+O~>7}L-2VjO)=mCk2!NtNA^F{6)(Hmi}%%G_1f@>cqNwsKN&F3dYM{29BF zv_%2sOZGaqQJUsK`8o%;tnL7Q+t>* z$eqDJZyxW(AgL5K;EpiSo6jY>pK^SIVbi$jg6zfUe8GdXKHrun8VHFhTHex}@r z$Zh+|b|w%um4;Pl3yAIoR)%e#2pB*6v{HA(?a~2bBATF>nOp74d(V(!j6e3&AJksAqmfo%@vF(+8onYfNKddqD6vk+k zM%z!Uq!Y&HqYUbNVA!D^zz|wIROlX-R%c+}X{DdL#Q z+t8{{wlTqGkvymodG7%XTh(FM@{qpk?~k5Dv+MinE46{!y% z&m&X3{jxq;bO%~3Q*ASR*G{aQ{U7Dc*Ywo?h_)7#TK9;ypMpKcOG4mwsq>R}RbiWB zd$_25%^Zayu>|!%H-}d0aUMm^995{{u700{R-Cn294B-SZv{nPZmgAi&}E4_n{_2& zjmU9~0q?_drasm)U1Uvv8O~I1c?vx2Of4q_S#+*ok2IbSHmQ`OJGHA%1`b9w>FT-- z4DL{Lri-)Z57#yNq7PoN4^!37Wpz!jaxHXKC`PZtjb}3PtrMBE*rJrV(MHOVVQNHD z5V3?R9Bw&jD&aNI3AgJBMxb{VjSN`mTb~63y=~>$*WBt=_&B7%LXXleI=QMd>POv~ zK0_k+5iJeq#gwahd&gJ9qOj%8H5U4}MAV&>Ll{ew13eM{U0UWW&fygMXyf%GI%-I! zh29J9Jk(u{(TfYfU5mO0_k^ey3))ckf$@(LkP0>C9@V>OFz}yIKmNh#o6vKILUa73 zK-U2MA?GmF$GPS#=DvqGqd>CHJCkrAQ!W@!Vp`~;6#H%94s_ZL;)NLDu=_a+{kQWd zeW|3VJwDG|htb>A;12XHn0upo3tAp?e;L*E2dDeiQ`XSqOc$U-dHWB*o&BkE4GHMU z2V;d^1ya8EOJZt{Mlhaf#J8rW%{n8BT^>CE^q7z&y-%r9I4s6?&zWYdC%lI8#YH1J zwEP!wr_O9t2MF#tdcUR)OvO7RS4KA7asTcC>^>OS|5c0bZ07`%JCpjqSNcVg>zcNE zA3Pi`$Ph~pc{iDE!0tq^d`UDZhY0S%_wMY&tfFYRI-NEI-5uyEaA!6=As8|D>5_c3 zd>ww?lHj-4?tfif#ou-Z_n(G)ZbA7T%$;&UU*u@Hx=ZNpnrAT%&GGG2hQ`Dgxy33_ z^{=iPX!wG6mp(u(Q`1~SGx}Er?2Z&BoJ{{V8`<_*wv*QJM#ZX~&7oac{83xyAG*UC zpSbGS%E)-piay%tveo-zr!7PRUUIw3Py5dj1y&>StA9#&G%{&LJ;Rl2c3%FCf5Ede z`-NWO;~u1WrX-+r!v&PiC}@dWT~*cM_Tnx*9;K6kekovgC(sY>2D(qSlhSdlYt`3l zL%Ra_*HQXJ-RbvaA4)gq z%j`rzk0DehfICKK6|`XPZr~nIy`+wG4bSLr2>7;BxZ(SQ;J*LU1Ag)k#(oP#-6v7^ z;)m-Q9B@~GI~jFf_qE0SdH3Zr{<9*QE&~0XH1!fM$-ZZ}#W!*Ful&z3x`c?jQwhzw z+f>6ZU*}9&SMD}9m@M@9Xoabozg|-kK&yLQQ^CWfnKJp7P`38 z`j@SaqfZ0(&Xn%*PoVrN)ID3V;W9I+sC!uDo9OohrT=MYlOXOvSD(opDhF<7urwz9 z$So#DPiSyUzDrM_mYs_tM_b*k^yL|Ta>ucqn7ih^yJ<1dB|4$Y6z#hAy8V0RSa%-+ z_a93!_k;#s(v}{S-iXn&4dW>N;xAf<=8tZNJ-v8m>Budkt*U=rm9E8Y4rs@aH5Yl(nmj8 zu|Ias${PO%$$hmzcla?6sZ=)i{M7JqlDKp7ci)W|{pGtC`gt~9xth1GaCNsc446AZ z+_HCF)kiIE^_JbE$j2tp@^JItqgS_odA1YPacmcM^Jdgt@Z0)eZ`WUk%cS7GhW4Ot z-L}3t3v00Qu)9MG>V8m?aETe!4&~P*`aX&N{8K1DK9A^rL(?P2u8w%YM1-3M`sylO z!xztizWvfU8aaAwv>_ll@>uH0^xc2Li~Gj!kz+>}N*~WaZ(b0!J{Vq{sZ1I#^zWnf z*R7-UqBq~k?XErJzs#Yb^i-v?Rk5Lw6+XA;U-UJJe&~yPnf;pcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.nto b/locidex/example/manifest_in/fails/fails_author/blast/nucleotide/nucleotide.nto new file mode 100644 index 0000000000000000000000000000000000000000..ad19396e81aff427697a109c3c035ac73cb27f3f GIT binary patch literal 216 zcmXBFg${xM06;;oyHK$M6YTo`U-LHJc6+}dFSG(hN|dQkrAD0wO0 +KFRPGHADYTYHQKYGVRDYRGGGRSSARETAMRVAAGAIAKKYLQQEFGIEVRAYLSQMGDVAIDKVDWNEIENNDFFCPDVDKVAAFDELIRELKKEGDSIGAKIQVVATGVPVGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVRQKGSQHRDPLTPQG +>1 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKHGIVIQGCLTQMGDIPLEIKDWQQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVANGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>2 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGVEIRGCLTQMGDIPLEIKDWSQVELNPFFCPDPDKIEVLDELMRGLKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>3 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLKEKLGIEVRGYLSQLGPITCDLVDWSIVESNPFFCPDPSRLDALDEYMRALKKEGNSIGAKVTVVAQGVPAGFGEPVFDRLDADLAHALMSINAVKGIEIGDGFGVVTLKGTENRDEITKKG +>4 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGIEIRGCLTQMGDIPLEIKDWSQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKDG +>5 +VFRPGHADYTYEQKYGFRDYRGGGRSSARETAMRVAAGAIAKKYLQQKFGIVIRGCLSQMGDIPLAIKDWDQVELNPFFCADADKLDALDELMRGLKKEGDSIGAKVTVVADGVPAGWGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVKLRGSQNRDEITKAG +>6 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAQKFGVVIRGCLTQMGDIPLEIKDWDQVEQNPFFCPDPDKIEALDELMRALKKEGDSIGAKVTVVADSVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFGVVQLRGSQNRDEITTAG +>7 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>8 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>9 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>10 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQPLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>11 +MEMVARVALVQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGERMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGQSLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>12 +MEMIARVTLTQPHDAGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGDSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>13 +MEMIARVALSLPHQAGATTVPARKFFDICRGLPEGAEIAVTLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCAMPVGQPLPNHSVIVPRKGVLELMRMLDGGDSPLRIQ +>14 +SALTENDLVFALSQHAVTFADAELQQQGKSWPSLPRYFAIGRTTALALHTVSGFNIHYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELIGETLTARGADVDFCECYQRSAKYYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>15 +AALGESDLLFALSQHAVAFAQSQLHQQDRKWPRLPTYFAIGRTTALALHTVSGQKILYPQDREISEVLLQLPELQNIAGKRALILRGNGGRELIGDTLTARGAEVTFCECYQRCAIHYDGAEEAMRWQSREVTTVVVTSGEMLQ +>16 +ATLTENDLVFALSQHAVAFAHAQLQRDGRNWPASPRYFAIGRTTALALHTVSGFDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRGRELLGETLTARGAEVSFCECYQRSAKHYDGAEEAMRWHTRGVTTLVVTSGEMLQ +>17 +AALTDNDLVFALSQHAVAFAHAQLQQQELDWPVQPRYFAIGRTTALALHTVNGCDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELLGKTLTERGAEVTFCECYQRSAKHYDGAEEAMRWHSRGVTTIVVTSGEMLQ +>18 +ETLGDNDLLFALSQHAVSFAHAQLQQQGLNWPSLPHYFAIGRTTALALHTVSGHKIRYPQDREISEVLLQLPELQSIAGKRALILRGNGGRELIGQTLTSRGADVTFCECYQRSAKHYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>19 +RLLQEGDLLFALSQHAVEFAHAQLQQHAVSWPHAPRYFAIGRTTALALHTASGIDVRYPLDREISEVLLQLPELQTIAGKRALILRGNGGRELLGETLRERGADVTFVECYQRCAKHYDGAEEAMRWHARGINTLVVTSGEMLQ +>20 +IAGCQKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADIARKVAEAVERQLAELPRAGTARQALSASRLIVTKDLAQCV +>21 +IAGCKKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGIAQNVAEAVERQLAELPRAETARQALSASRLIVTKDLAQCV +>22 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGMASRVAEAVERQLAALPRAETARQALSASRLIVTRSLAQCV +>23 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGTESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPAADMARRVAEAVERQLAELPRAETARQALNASRLIVTKDLAQCV +>24 +IAGCQKVVLCSPPPIADEILYAAKLCGVQAIYKVGGAQAISALAFGTVSIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADMAKRVGDAVERQLADLPRAETARQALSASRLIVARDLDQCI +>25 +IAGCKKVVLCSPPPIADEILYAAQLCGVKEVFNVGGAQAIAALALGTESIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAKLAEGVAEAVERQLAELSRADTARQALSASRLIVAKDLAQCV +>26 +IAGCKKVVLCSPPPIADEILYAARLCGVQQVYQVGGAQAIAALAFGTETVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATTDFVASDLLSQAEHGPDSQVILLTPDSAMAQAVADAVERQLAELPRAETARQALAESRLIVARDLAQCV +>27 +SDWATMQFAAEIFDILDIPHHVEVVSAHRTPDKLFSFAENAEENGFQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLRD +>28 +SDWTTMQFAAEIFEILDVPHHVEVVSAHRTPDKLFSFAETAEENGYHVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLSN +>29 +SDWATMQFAAEILDILNVPHHVEVVSAHRTPDKLFSFAEDAESNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>30 +SDWATMQFAVEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKELHQRLNG +>31 +SDWATMQFAAEIFDILNVPHHVEVVSAHRTPDKLFSFAESAEEKGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLRQRLAD +>32 +SDWATMQFAAEIFEMLDVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKALHQRLSD +>33 +SDWATMSHAADVLDTLQIPYHVEIVSAHRTPDKLFSFAEKAKSNGFDVIIAGAGGAAHLPGMLAAKTLVPVFGVPVQSATLSGVDSLYSIVQMPKGIPVGTLAIGKAGAANAALLAAQVLALHSPAILDALTA +>34 +SDWATMQFAAEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYEVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLHQRLAE +>35 +SDWATMQFAAETAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>36 +SDWATMQHAAEILDALDVPYHVEVVSAHRTPDKLFSFAESAQHNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDDALLARLAA +>37 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLINVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>38 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>39 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDFQTDGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>40 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>41 +RTFLEELTAAEGLERYLGAKFPGAKRFSLEGGDALVPMTKEMIRHAGASGMREVVIGMAHRGRLNMLVNVLGKKPQDLFDEFAGKHGEGWGTGDVKYHQGFSADFATPGGDVHLALAFNPSHLEIVNPVVMGSVRARQDRLGDEDGSKVLPITIHGDSAIAGQGVVA +>42 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>43 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDDDNLPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>44 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGANSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFATMSRAGISVVLITQSSSEYSISFCVPPKRC +>45 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGAASDDDALPVKGISHLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>46 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASVDEDELPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>47 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>48 +VLGRNGSDYSAAVLAACLRAKCCEIWTDVDGVYTCDPRLVPDARLLKGMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPDAPGTLIGDGQKDESTPVKGITNLNNMAMINVSGPGMKGMVGMAARVFSVMSRAGISVVLITQSSSEYSISFCVPQKEL +>49 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDC +>50 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRSGISVVLITQSSSEYSISFCVPQADC +>51 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTISPIAQFQIPCLIKNTGNPQAPGTLIGASADEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRNGISVVLITQSSSEYSISFCVPQGDC +>52 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQADS diff --git a/locidex/example/manifest_in/fails/fails_author/config.json b/locidex/example/manifest_in/fails/fails_author/config.json new file mode 100644 index 0000000..5464d0e --- /dev/null +++ b/locidex/example/manifest_in/fails/fails_author/config.json @@ -0,0 +1,12 @@ +{ + "db_name": "Locidex Database", + "db_version": "1.0.0", + "db_date": "04/04/2024", + "db_author": "", + "db_desc": "", + "db_num_seqs": 53, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" +} \ No newline at end of file diff --git a/locidex/example/manifest_in/fails/fails_author/meta.json b/locidex/example/manifest_in/fails/fails_author/meta.json new file mode 100644 index 0000000..f3b88fa --- /dev/null +++ b/locidex/example/manifest_in/fails/fails_author/meta.json @@ -0,0 +1,1181 @@ +{ + "info": { + "num_seqs": 53, + "is_cds": "True", + "trans_table": 11, + "dna_min_len": 220, + "dna_max_len": 350, + "dna_min_ident": 80, + "aa_min_len": 73, + "aa_max_len": 116, + "aa_min_ident": 80 + }, + "meta": { + "0": { + "seq_id": 0, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 609, + "dna_seq_len": 501, + "dna_seq_hash": "4811bc98591c74954ace3cb487330482", + "aa_seq_len": 167, + "aa_seq_hash": "a8fbcf8179d8548f980b7b15f29de1d4", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "1": { + "seq_id": 1, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 614, + "dna_seq_len": 501, + "dna_seq_hash": "b66979eaf680fab872ffe1bde4c092d6", + "aa_seq_len": 167, + "aa_seq_hash": "3e034a4d80ac27352822774abd9319df", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "2": { + "seq_id": 2, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 618, + "dna_seq_len": 501, + "dna_seq_hash": "f02a36ff6df05f9bf38428fa22a035da", + "aa_seq_len": 167, + "aa_seq_hash": "e2d30bb18231528ef65c34880704dd7a", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "3": { + "seq_id": 3, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 619, + "dna_seq_len": 501, + "dna_seq_hash": "bee9d7360aa8e9b840fb29afa1de2c2e", + "aa_seq_len": 167, + "aa_seq_hash": "c3f71f5780b5f1031aaf21697a482ee3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "4": { + "seq_id": 4, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 620, + "dna_seq_len": 501, + "dna_seq_hash": "5b7956485455fdbc7c86d4834a8f7406", + "aa_seq_len": 167, + "aa_seq_hash": "60ce8f3b07f53378580ee528910ee623", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "5": { + "seq_id": 5, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 624, + "dna_seq_len": 501, + "dna_seq_hash": "98ba14aac74444a253123aff3d20c69f", + "aa_seq_len": 167, + "aa_seq_hash": "bab41702c7c209def93f9c9930c27086", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "6": { + "seq_id": 6, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 716, + "dna_seq_len": 501, + "dna_seq_hash": "6b9166d5d996897cae3cc288d7969d78", + "aa_seq_len": 167, + "aa_seq_hash": "5bc86c0a9226224922cbd6219c182622", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "7": { + "seq_id": 7, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "d401763f2df6e5fe87e1e07d3c170fe6", + "aa_seq_len": 167, + "aa_seq_hash": "928ad814483bbffda3e3b3a0aa4ca072", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "8": { + "seq_id": 8, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 120, + "dna_seq_len": 501, + "dna_seq_hash": "9c50d73cc4ef8d0a447f07ad150ad8cc", + "aa_seq_len": 167, + "aa_seq_hash": "928ad814483bbffda3e3b3a0aa4ca072", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "9": { + "seq_id": 9, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 555, + "dna_seq_len": 501, + "dna_seq_hash": "fab4f658dfba0cd0174a4a87998cf948", + "aa_seq_len": 167, + "aa_seq_hash": "a081905e659429db1f40e145932ae277", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "10": { + "seq_id": 10, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 557, + "dna_seq_len": 501, + "dna_seq_hash": "acb2ed027124e2a54b7734cd538590f1", + "aa_seq_len": 167, + "aa_seq_hash": "970184ec5ccc9f02ee3c858d2687cc18", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "11": { + "seq_id": 11, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 558, + "dna_seq_len": 501, + "dna_seq_hash": "ad996a122298d55ab3d4b2ea7a4974b0", + "aa_seq_len": 167, + "aa_seq_hash": "945455021fffea9b793d16af630db961", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "12": { + "seq_id": 12, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 563, + "dna_seq_len": 501, + "dna_seq_hash": "815242e67f31f4e2968f7f0620565125", + "aa_seq_len": 167, + "aa_seq_hash": "1b117ca76a022ae63d6f7bfe2ead289e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "13": { + "seq_id": 13, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 633, + "dna_seq_len": 501, + "dna_seq_hash": "532742ae95c046241789d79e68e30b7a", + "aa_seq_len": 167, + "aa_seq_hash": "fff51d2396f3da88a775416b4c6d14b6", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "14": { + "seq_id": 14, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 316, + "dna_seq_len": 432, + "dna_seq_hash": "3922f6256f2891400db415013eb0b208", + "aa_seq_len": 144, + "aa_seq_hash": "0af9d546dfcaf93373a8919df3e30323", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "15": { + "seq_id": 15, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 343, + "dna_seq_len": 432, + "dna_seq_hash": "f76c13e33ad5b502dfe64181dbdf2378", + "aa_seq_len": 144, + "aa_seq_hash": "32484f065f9013aaa5b3c694cc99cdbf", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "16": { + "seq_id": 16, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 472, + "dna_seq_len": 438, + "dna_seq_hash": "80bea3abd165ee14e51bc9e9779fc6a1", + "aa_seq_len": 146, + "aa_seq_hash": "4e9cc2d289f1c946738cc8e6e4ef1186", + "dna_min_len": 306, + "dna_max_len": 744, + "aa_min_len": 102, + "aa_max_len": 248, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "17": { + "seq_id": 17, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 489, + "dna_seq_len": 432, + "dna_seq_hash": "83a314185d9ff0bf7c2953d30979e7eb", + "aa_seq_len": 144, + "aa_seq_hash": "5f9fc3707789543f2f14b0f1a555a05c", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "18": { + "seq_id": 18, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 497, + "dna_seq_len": 432, + "dna_seq_hash": "c70622b317de74bdaf57eb8bb5134537", + "aa_seq_len": 144, + "aa_seq_hash": "56b3d46d3e517eb7f83f089f9ed5ae2a", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "19": { + "seq_id": 19, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 498, + "dna_seq_len": 432, + "dna_seq_hash": "f284b11b34de688e2ef54c1b73936595", + "aa_seq_len": 144, + "aa_seq_hash": "da558cdebd900031d0df8f58ef01454e", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "20": { + "seq_id": 20, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "9f762c246c542c52c94c5022ca62311c", + "aa_seq_len": 167, + "aa_seq_hash": "447381a0d286fa1037b5499e2242819a", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "21": { + "seq_id": 21, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 754, + "dna_seq_len": 501, + "dna_seq_hash": "65b434bea0d1939d2b748dbc5dd6df8b", + "aa_seq_len": 167, + "aa_seq_hash": "2b685aa7892794b69c9faa20c58a9183", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "22": { + "seq_id": 22, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 757, + "dna_seq_len": 501, + "dna_seq_hash": "eccfc35078428e44e5dd3e85d9ebf1fe", + "aa_seq_len": 167, + "aa_seq_hash": "35fa89ee4cd8689b89d553157471afe0", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "23": { + "seq_id": 23, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 759, + "dna_seq_len": 501, + "dna_seq_hash": "ce01d780cd0ffe3197f708d7048a473b", + "aa_seq_len": 167, + "aa_seq_hash": "bc0edd26ea6032cc4939e8cbc17a12d3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "24": { + "seq_id": 24, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 768, + "dna_seq_len": 501, + "dna_seq_hash": "23377e95fe00bf6a16b51fe8929a938a", + "aa_seq_len": 167, + "aa_seq_hash": "9fb34628ef67396ed38c755280e04f7e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "25": { + "seq_id": 25, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 838, + "dna_seq_len": 501, + "dna_seq_hash": "8478cdd016753651cd73afc4ad20c7df", + "aa_seq_len": 167, + "aa_seq_hash": "6512669779521a6792ecdae3088467f7", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "26": { + "seq_id": 26, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 907, + "dna_seq_len": 501, + "dna_seq_hash": "ab935d39fffeff601d95a8362ba454f3", + "aa_seq_len": 167, + "aa_seq_hash": "1c277aef51e883e29ee8b489c525ea1b", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "27": { + "seq_id": 27, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 24, + "dna_seq_len": 399, + "dna_seq_hash": "a7af783dc7084f1b8bc593aa29f80003", + "aa_seq_len": 133, + "aa_seq_hash": "46a0c532edb92303b1b9d12a80056a60", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "28": { + "seq_id": 28, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 48, + "dna_seq_len": 399, + "dna_seq_hash": "9fb313e6232b0d0e14d2fc4be7c409f7", + "aa_seq_len": 133, + "aa_seq_hash": "0e56efdd1f7fbaf132524616e29d98ca", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "29": { + "seq_id": 29, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 317, + "dna_seq_len": 399, + "dna_seq_hash": "50cd750e2f6860dd489040f1d5f64f9b", + "aa_seq_len": 133, + "aa_seq_hash": "18e887a66ce56a930dbf8db48b406596", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "30": { + "seq_id": 30, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 487, + "dna_seq_len": 399, + "dna_seq_hash": "0e1384e36f3897f65690f9230d2bcd73", + "aa_seq_len": 133, + "aa_seq_hash": "20c9a488aa6542257a151ced866d2f8f", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "31": { + "seq_id": 31, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 608, + "dna_seq_len": 399, + "dna_seq_hash": "e180fd1852382c132851674a9e379c03", + "aa_seq_len": 133, + "aa_seq_hash": "c7da76b50946241fe125348a19a9b6a3", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "32": { + "seq_id": 32, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 611, + "dna_seq_len": 399, + "dna_seq_hash": "0ec842f985e93041c928ab7bb137295d", + "aa_seq_len": 133, + "aa_seq_hash": "be3990f2abaa8780b14e62d4fc8cd82a", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "33": { + "seq_id": 33, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 612, + "dna_seq_len": 399, + "dna_seq_hash": "9d42e484ea2936f87312f07abf0ad84a", + "aa_seq_len": 133, + "aa_seq_hash": "7af624e3930c7a5ab7785b08d925081c", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "34": { + "seq_id": 34, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 619, + "dna_seq_len": 399, + "dna_seq_hash": "02949c6f858f3cc5de1b13c9f5a40705", + "aa_seq_len": 133, + "aa_seq_hash": "52d120d4090a22e450633e01e4ccb729", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "35": { + "seq_id": 35, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 631, + "dna_seq_len": 315, + "dna_seq_hash": "c4715d7df9a9eebfe5a334dd55ee469b", + "aa_seq_len": 105, + "aa_seq_hash": "31aa38918b303bf67374188e11413e59", + "dna_min_len": 220, + "dna_max_len": 535, + "aa_min_len": 73, + "aa_max_len": 178, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "36": { + "seq_id": 36, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 724, + "dna_seq_len": 399, + "dna_seq_hash": "782d08e7ee8a031a1402020e708bfbbc", + "aa_seq_len": 133, + "aa_seq_hash": "b5f9063808b8be839e7f169bf73c88e4", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "37": { + "seq_id": 37, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "481b6454f33fae7875b4978c14094ec3", + "aa_seq_len": 167, + "aa_seq_hash": "fa04457773c66ae015014e915af2516d", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "38": { + "seq_id": 38, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 30, + "dna_seq_len": 501, + "dna_seq_hash": "79048d21794195277a6af839be13e6e1", + "aa_seq_len": 167, + "aa_seq_hash": "186c53cb5c2bf0b7ecac853c6067065d", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "39": { + "seq_id": 39, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 281, + "dna_seq_len": 501, + "dna_seq_hash": "f10d273aa97d5556a43b96721d666975", + "aa_seq_len": 167, + "aa_seq_hash": "4172d5e8c8265884fe5479e10527cb02", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "40": { + "seq_id": 40, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 399, + "dna_seq_len": 501, + "dna_seq_hash": "1839775cc7c29412648ec7b004e1a417", + "aa_seq_len": 167, + "aa_seq_hash": "c4cfbbf5c5814829188f4f404f312bd3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "41": { + "seq_id": 41, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 571, + "dna_seq_len": 501, + "dna_seq_hash": "fce3e68952108e415579b3ad24a3f150", + "aa_seq_len": 167, + "aa_seq_hash": "43372b6526524f5ed4542be83b5b8614", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "42": { + "seq_id": 42, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 686, + "dna_seq_len": 501, + "dna_seq_hash": "629ea0cbfe0d2e9f34b1ca034a6c55fd", + "aa_seq_len": 167, + "aa_seq_hash": "c4cfbbf5c5814829188f4f404f312bd3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "43": { + "seq_id": 43, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "eaec644b411bd0b3ab1e086fbabd29c9", + "aa_seq_len": 167, + "aa_seq_hash": "bfe756f2f421db752907a171f3a44d69", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "44": { + "seq_id": 44, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 31, + "dna_seq_len": 501, + "dna_seq_hash": "97e4acce4e840b1c48de51f55fccf620", + "aa_seq_len": 167, + "aa_seq_hash": "be9296cb1ea9443fb43c0f967d107988", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "45": { + "seq_id": 45, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 208, + "dna_seq_len": 501, + "dna_seq_hash": "fbc6cb34cddfb1fe6a7806d5f7613259", + "aa_seq_len": 167, + "aa_seq_hash": "b788ec581475c9ba71d997b2db6e1def", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "46": { + "seq_id": 46, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 630, + "dna_seq_len": 501, + "dna_seq_hash": "ce58c0cacd4e8d9fa4867d11f2add864", + "aa_seq_len": 167, + "aa_seq_hash": "c062c5c88bdebdf2883e06fe6823c71c", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "47": { + "seq_id": 47, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 631, + "dna_seq_len": 501, + "dna_seq_hash": "949426df5430f94547459d06c786d77b", + "aa_seq_len": 167, + "aa_seq_hash": "dac50e2b5df83fe87c9826ecf99d568e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "48": { + "seq_id": 48, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 632, + "dna_seq_len": 501, + "dna_seq_hash": "9a187a6b3e4675fe12ea213c7a23577c", + "aa_seq_len": 167, + "aa_seq_hash": "6536824faaa7880cfb44a6cd1ed057c9", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "49": { + "seq_id": 49, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 633, + "dna_seq_len": 501, + "dna_seq_hash": "7be8b9732228c1f82630b547d7011a5e", + "aa_seq_len": 167, + "aa_seq_hash": "1eac2cb94b8f619df1c9b0f3369f4a96", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "50": { + "seq_id": 50, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 637, + "dna_seq_len": 501, + "dna_seq_hash": "1895acdf991b49a885873fe82ce9ca85", + "aa_seq_len": 167, + "aa_seq_hash": "9fe9521d0bf495570a0fd425c0e48764", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "51": { + "seq_id": 51, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 638, + "dna_seq_len": 501, + "dna_seq_hash": "9776bbec78b5214d3dfca0d32b395d4b", + "aa_seq_len": 167, + "aa_seq_hash": "2914d167cc3579348e36d16afc628a39", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "52": { + "seq_id": 52, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 748, + "dna_seq_len": 501, + "dna_seq_hash": "6cf9d69644c819d9ecd3a0fd090977fc", + "aa_seq_len": 167, + "aa_seq_hash": "cf0168a601a4f5792c7326a2da650edb", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + } + } +} \ No newline at end of file diff --git a/locidex/example/manifest_in/fails/fails_author/results.json b/locidex/example/manifest_in/fails/fails_author/results.json new file mode 100644 index 0000000..5252454 --- /dev/null +++ b/locidex/example/manifest_in/fails/fails_author/results.json @@ -0,0 +1,14 @@ +{ + "analysis_start_time": "2024-04-04 14:12:12", + "parameters": { + "input_file": "locidex/example/build_db_mlst_in/senterica.mlst.txt", + "outdir": "/tmp/pytest-of-mwells/pytest-82/build0", + "name": "Locidex Database", + "db_ver": "1.0.0", + "db_desc": "", + "author": "", + "date": "", + "force": true + }, + "analysis_end_time": "2024-04-04 14:12:12" +} \ No newline at end of file diff --git a/locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.fasta b/locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.fasta new file mode 100644 index 0000000..a03cb89 --- /dev/null +++ b/locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.fasta @@ -0,0 +1,106 @@ +>0 +AAATTCCGTCCCGGACATGCGGACTACACCTATCACCAAAAATACGGTGTGCGAGATTACCGTGGCGGCGGCCGTTCATCGGCACGTGAAACCGCCATGCGTGTTGCTGCGGGAGCGATTGCCAAAAAATATCTGCAGCAAGAGTTTGGCATTGAAGTGCGTGCTTACTTGTCGCAAATGGGGGATGTCGCGATTGATAAAGTGGATTGGAATGAGATTGAAAACAACGATTTCTTCTGTCCTGATGTCGATAAAGTGGCTGCGTTTGACGAGCTGATCCGCGAGCTGAAAAAAGAAGGCGATTCGATCGGCGCGAAAATCCAAGTGGTCGCTACAGGCGTGCCGGTTGGACTGGGTGAGCCTGTGTTTGATCGCTTAGATGCGGATATTGCCCATGCCTTGATGAGCATCAACGCCGTGAAAGGAGTCGAGATTGGTGATGGCTTTGATGTGGTGCGCCAAAAAGGCAGCCAACACCGTGACCCGCTCACTCCACAAGGT +>1 +GTTTTCCGCCCGGGCCATGCCGACTATACCTACGAGCAGAAATACGGTCTGCGCGATTACCGTGGCGGCGGTCGTTCTTCCGCCCGTGAAACGGCGATGCGCGTCGCGGCTGGCGCGATTGCTAAAAAATATCTGGCGGAGAAACACGGCATCGTCATTCAGGGGTGTCTGACCCAGATGGGCGATATTCCGCTTGAAATCAAAGACTGGCAGCAGGTTGAACAAAACCCGTTTTTCTGTCCTGATCCAGATAAAATCGACGCGCTGGATGAACTGATGCGCGCCCTGAAGAAAGAGGGCGATTCGATTGGGGCAAAAGTGACCGTCGTGGCAAACGGCGTTCCGGCCGGGCTTGGCGAACCGGTCTTTGACCGTCTGGATGCGGACATCGCTCATGCGCTGATGAGCATCAACGCGGTAAAAGGCGTGGAGATTGGCGATGGGTTTGATGTGGTCGCGTTGCGAGGCAGCCAGAATCGCGATGAAATTACCAAAGAGGGC +>2 +GTTTTCCGTCCAGGACACGCTGACTATACCTATGAGCAGAAATATGGCCTGCGCGACTACCGTGGCGGCGGACGTTCATCCGCGCGTGAAACGGCGATGCGCGTTGCGGCTGGCGCGATTGCCAAAAAATATCTGGCGGAAAAATTCGGCGTTGAAATTCGCGGCTGTCTGACGCAGATGGGGGATATTCCGCTGGAGATCAAAGACTGGTCTCAGGTGGAGCTTAACCCGTTCTTTTGTCCAGACCCGGATAAAATCGAAGTGCTGGACGAACTGATGCGCGGGCTGAAGAAAGAGGGCGACTCCATCGGGGCAAAAGTGACCGTTGTTGCAAGCGGCGTACCGGCGGGTCTCGGCGAACCTGTATTCGACCGTCTGGATGCCGACATCGCCCATGCGCTGATGAGCATTAACGCCGTTAAGGGCGTTGAGATTGGCGACGGTTTTGACGTTGTTGCGCTGCGCGGCAGTCAGAACCGCGATGAGATCACCAAAGAAGGT +>3 +GTTTTCCGCCCAGGGCATGCTGATTATACCTATGAACAAAAATATGGTTTGCGTGATTATCGTGGTGGTGGACGTTCTTCTGCTCGTGAAACGGCAATGCGTGTCGCCGCAGGTGCGATTGCTAAAAAATATCTAAAAGAGAAATTAGGCATCGAAGTTCGAGGATATCTTTCTCAGCTAGGACCTATTACATGTGATCTTGTTGATTGGTCTATTGTTGAAAGCAATCCATTTTTCTGTCCTGATCCTTCACGTTTAGATGCGCTTGATGAATACATGCGTGCACTTAAAAAAGAAGGTAATTCTATTGGTGCAAAAGTCACTGTGGTTGCACAGGGTGTACCTGCTGGATTTGGTGAACCTGTCTTTGATCGATTAGATGCTGATTTAGCGCATGCTTTGATGAGTATCAATGCTGTCAAAGGTATAGAAATTGGTGATGGATTTGGTGTTGTAACATTAAAAGGTACAGAAAACCGAGATGAAATCACTAAAAAGGGA +>4 +GTTTTCCGTCCAGGCCATGCCGATTACACCTACGAACAAAAATACGGTCTGCGCGATTATCGCGGCGGCGGGCGCTCTTCCGCCCGCGAAACCGCCATGCGCGTGGCGGCAGGGGCGATTGCAAAAAAATATCTCGCCGAGAAATTTGGCATTGAGATTCGCGGCTGCCTGACCCAGATGGGTGACATTCCGCTGGAAATCAAAGACTGGTCGCAGGTCGAGCAAAATCCGTTTTTCTGCCCGGACCCGGACAAAATCGACGCGTTAGATGAACTGATGCGCGCGCTGAAAAAAGAGGGCGACTCCATCGGCGCGAAAGTCACCGTTGTTGCCAGTGGCGTCCCCGCCGGACTTGGCGAGCCGGTCTTTGACCGCCTGGATGCCGACATCGCCCATGCGCTGATGAGCATCAACGCGGTGAAAGGCGTAGAAATTGGTGATGGTTTTGACGTGGTGGCGCTGCGTGGCAGCCAGAACCGCGACGAAATCACCAAAGACGGT +>5 +GTTTTCCGTCCTGGTCACGCCGACTATACCTACGAACAAAAATATGGCTTTCGCGACTATCGCGGCGGCGGGCGTTCTTCCGCGCGTGAAACCGCGATGCGCGTGGCGGCAGGGGCAATTGCCAAAAAATATCTCCAGCAGAAATTCGGCATCGTTATCCGCGGCTGTCTGTCCCAGATGGGCGACATTCCGCTGGCAATCAAAGACTGGGATCAGGTAGAGCTCAACCCGTTCTTCTGCGCCGATGCCGACAAGCTGGACGCGCTGGATGAGCTGATGCGTGGCCTGAAAAAAGAGGGCGACTCCATTGGTGCGAAAGTCACCGTGGTGGCCGACGGCGTGCCGGCTGGCTGGGGCGAGCCGGTATTTGACCGCCTTGACGCCGACATCGCCCACGCGCTGATGAGCATCAACGCGGTGAAAGGCGTCGAAATCGGCGACGGTTTTGACGTGGTCAAGCTTCGCGGCAGCCAGAACCGCGACGAAATCACGAAGGCGGGT +>6 +GTGTTCCGTCCGGGGCACGCGGATTACACCTACGAACAAAAATACGGCCTGCGCGACTATCGCGGCGGCGGGCGTTCATCCGCCCGTGAAACCGCCATGCGCGTCGCGGCAGGCGCTATCGCCAAAAAATATCTGGCGCAGAAATTCGGCGTGGTGATTCGCGGCTGCCTGACCCAGATGGGTGATATTCCGCTGGAAATCAAAGACTGGGATCAGGTAGAGCAAAACCCGTTCTTCTGCCCGGACCCGGATAAAATCGAGGCGCTGGATGAGCTGATGCGCGCTCTGAAAAAAGAGGGCGATTCCATCGGCGCGAAAGTCACCGTGGTGGCCGACAGCGTGCCCGCCGGGCTTGGCGAGCCGGTATTTGACCGCCTGGACGCCGATATCGCCCACGCGCTGATGAGCATTAACGCCGTGAAGGGCGTGGAAATCGGCGACGGTTTCGGCGTGGTGCAACTGCGCGGCAGCCAGAACCGCGACGAAATCACCACTGCCGGT +>7 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAGCCAGGCGCCACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGCGGCCTGCCGGAGGGCGCGGAGATTGCCGTTCAGTTGGAAGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGCCGCTTCTCGCTGTCTACGCTGCCTGCCGCCGATTTCCCGAATCTTGACGACTGGCAAAGCGAAGTTGAATTTACGCTGCCGCAGGCCACGATGAAGCGCCTGATTGAAGCGACCCAGTTTTCGATGGCCCATCAGGATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAGCGAACTGCGCACTGTTGCGACCGACGGCCACCGTCTGGCGGTGTGCTCAATGCCGCTGGAGGCGTCTTTACCTAGCCACTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGTGGCGAAAACCCGCTGCGCGTGCAG +>8 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAACCCGGCGCTACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCCTGCCGGAAGGGGCGGAAATCGCCGTTCAGCTGGAGGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGTCGCTTTTCGCTGTCTACCTTACCGGCAGCAGACTTCCCGAATCTGGATGACTGGCAAAGCGAAGTGGAATTCACGCTGCCTCAGGCGACGATGAAACGCTTGATTGAGGCCACCCAGTTTTCGATGGCCCATCAGGACGTGCGCTACTACCTGAACGGTATGTTGTTTGAAACGGAAGGAAGCGAACTGCGCACCGTCGCGACCGACGGCCACCGTCTGGCGGTCTGTTCAATGCCGCTGGAGGCCTCTTTACCGAGCCATTCAGTGATCGTACCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTTGACGGCGGTGAAAATCCACTGCGTGTACAG +>9 +ATGGAAATGGTGGCGCGCGTTGCGTTGATTCAGCCTCATGAACCAGGCGCAACTACCGTCCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCTTGCCGGAAGGGGCTGAAATTGCCGTCCAGCTGGAAGGCGATCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTCTCGCTTTCCACGCTGCCTGCCGCCGATTTCCCTAATCTGGATGACTGGCAGAGCGAAGTCGAATTCACCCTGCCGCAGGCAACGATGAAGCGCCTGATTGAAGCCACCCAGTTCTCAATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAGACTGAAGGTGAAGAGTTGCGTACCGTCGCGACCGACGGTCACCGTCTGGCGGTCTGCTCTATGCCGGTCGGGCAATCTCTGCCTAACCATTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAGCTGATGCGTATGCTCGACGGCGGCGAAACCCCGCTGCGCGTACAG +>10 +ATGGAGATGGTGGCGCGCGTGGCGCTGATCCAGCCTCATGAACCTGGTGCGACCACCGTTCCGGCGCGTAAATTCTTCGATATTTGCCGTGGATTACCAGAAGGGGCGGAAATTGCCGTTCAACTGGAAGGCGACCGTATGCTGGTGCGTTCTGGCCGCAGCCGTTTCTCGCTGTCTACGCTGCCTGCCGCCGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTCGAATTCACCCTGCCACAGGCGACAATGAAGCGCCTGATTGAAGCCACGCAGTTTTCGATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAAACCGAAGGGGAAGAGTTGCGTACCGTGGCGACCGACGGTCACCGCCTGGCGGTCTGTTCAATGCCTGTCGGTCAGCCGTTGCCTAGCCATTCGGTGATCGTACCGCGTAAAGGTGTGATTGAACTGATGCGTATGCTCGACGGCGGCGATAACCCGCTGCGCGTGCAG +>11 +ATGGAAATGGTGGCACGCGTTGCGCTGGTTCAGCCGCACGAACCAGGGGCGACGACCGTTCCAGCGCGCAAATTCTTTGATATCTGCCGTGGTCTGCCTGAAGGCGCGGAAATTGCCGTGCAGCTGGAAGGTGAGCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTTTCGCTGTCTACCCTGCCAGCGGCGGATTTCCCGAATCTCGATGACTGGCAGAGCGAAGTCGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCGACCCAGTTTTCTATGGCGCATCAGGACGTTCGCTATTACTTAAACGGTATGCTGTTTGAAACCGAAGGTGAAGAACTGCGCACCGTGGCGACCGACGGCCACCGTCTGGCAGTCTGTTCAATGCCAATTGGTCAATCTTTGCCAAGCCATTCGGTGATCGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGCGGCGACAATCCGCTGCGCGTGCAG +>12 +ATGGAAATGATCGCGCGCGTTACGCTGACTCAGCCGCACGACGCGGGCGCGACCACGGTTCCGGCACGTAAATTCTTTGATATTTGCCGTGGGCTGCCGGAAGGCGCTGAAATCGCAGTGCAGCTGGAGGGCGACCGCATGCTGGTGCGCTCTGGCCGCAGCCGTTTCTCCCTCTCCACGTTGCCCGCTGCGGACTTCCCGAACCTGGATGACTGGCAGAGCGAAGTTGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCCACGCAGTTCTCCATGGCGCATCAGGACGTTCGTTACTACTTAAACGGCATGCTGTTCGAAACCGAAGGTGAAGAGCTGCGTACCGTGGCGACCGACGGTCACCGTCTGGCGGTTTGTTCCATGCCGATTGGCGATTCACTGCCAAACCATTCGGTGATCGTACCGCGTAAAGGCGTAATTGAACTGATGCGTATGCTCGACGGCGGTGAAACGCCGCTGCGCGTGCAG +>13 +ATGGAGATGATCGCGCGTGTGGCGCTGTCGCTACCGCACCAGGCGGGCGCGACCACCGTGCCGGCGCGCAAATTCTTCGATATCTGCCGTGGCTTGCCGGAAGGGGCGGAAATCGCCGTTACGCTGGAAGGCGACAGAATGCTGGTGCGCTCCGGGCGCAGCCGCTTCTCGCTGTCTACGTTACCGGCGGCAGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTGGAGTTCACGCTCCCGCAGGCCACCATGAAGCGCCTGATCGAAGCGACCCAGTTCTCCATGGCCCATCAGGACGTGCGGTATTACCTGAACGGGATGCTGTTTGAAACCGAAGGCGAAGAGCTGCGCACCGTGGCGACTGACGGCCACCGTCTGGCGGTATGCGCGATGCCGGTAGGCCAACCGCTGCCAAACCATTCGGTGATTGTACCGCGTAAAGGCGTGCTGGAGCTGATGCGTATGCTCGATGGCGGCGACAGCCCGCTGCGCATTCAG +>14 +TCGGCGCTGACGGAAAACGATCTGGTCTTCGCCCTCTCGCAGCACGCCGTCACCTTTGCAGATGCCGAGCTTCAGCAACAAGGGAAAAGCTGGCCCTCCCTTCCGCGTTATTTTGCCATTGGTCGCACAACGGCGCTGGCGCTGCATACCGTTAGCGGTTTCAATATTCACTACCCTCTGGATCGGGAAATTAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGCGCGCTTATATTACGCGGCAATGGTGGCCGTGAGCTGATAGGTGAAACCCTGACAGCACGCGGAGCTGATGTCGATTTTTGTGAATGTTATCAACGCAGTGCAAAATATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCTCGTGGTGTGACCACGGTGGTTGTCACCAGCGGAGAGATGCTACAA +>15 +GCGGCGCTGGGGGAGAGCGATCTGTTGTTTGCCCTCTCGCAACACGCGGTTGCTTTTGCCCAATCACAGCTGCATCAGCAAGATCGTAAATGGCCCCGACTACCTACTTATTTCGCCATTGGACGCACCACCGCACTGGCGCTACATACCGTAAGCGGACAGAAGATTCTCTACCCGCAGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGTGCGCTGATATTACGTGGCAATGGCGGTCGTGAGCTAATTGGGGATACCCTGACGGCGCGCGGTGCTGAGGTCACTTTTTGTGAATGTTATCAACGATGCGCAATCCATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCCCGCGAGGTGACGACGGTCGTTGTTACCAGCGGTGAAATGTTGCAG +>16 +GCGACGTTGACGGAAAACGATCTGGTTTTTGCCCTTTCACAGCACGCCGTCGCCTTTGCCCACGCCCAACTCCAGCGAGATGGTCGAAACTGGCCTGCGTCGCCGCGCTATTTCGCGATTGGTCGCACCACGGCGCTCGCCCTTCATACCGTTAGCGGGTTCGATATTCGTTATCCATTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGCGCGCTGATTTTGCGTGGCAATGGCGGTCGCGGTCGCGAACTGCTGGGCGAAACCCTGACAGCTCGCGGAGCCGAAGTCAGTTTTTGTGAATGTTATCAACGAAGTGCGAAACATTACGATGGCGCAGAAGAGGCGATGCGCTGGCACACTCGCGGCGTAACGACGCTTGTTGTCACCAGCGGCGAGATGTTGCAA +>17 +GCGGCGCTCACGGACAACGATCTGGTGTTCGCCCTCTCGCAACACGCCGTCGCCTTTGCCCACGCCCAACTGCAACAGCAGGAGCTGGACTGGCCTGTGCAACCACGCTACTTCGCCATCGGGCGCACAACGGCGCTGGCGCTGCATACCGTTAACGGATGCGATATTCGCTATCCTCTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGAGCGCTTATTTTACGGGGCAACGGCGGGCGTGAACTGTTAGGCAAAACCCTCACAGAACGCGGCGCTGAAGTCACCTTTTGTGAATGTTATCAACGCAGTGCAAAACATTACGATGGCGCGGAAGAGGCGATGCGCTGGCACTCTCGCGGCGTGACGACGATTGTTGTCACCAGCGGCGAAATGCTGCAA +>18 +GAAACACTTGGCGATAACGATCTGCTCTTTGCACTTTCTCAACATGCAGTGTCATTCGCCCATGCGCAGTTGCAACAGCAGGGGCTAAACTGGCCATCACTTCCGCATTATTTCGCTATTGGCCGTACTACCGCTCTCGCCCTGCACACCGTAAGCGGACATAAGATTCGCTATCCACAAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCGGAATTACAAAGTATTGCGGGAAAACGCGCACTTATTTTGCGCGGTAACGGCGGCCGTGAATTGATCGGTCAGACGCTGACATCACGTGGTGCCGACGTTACTTTTTGTGAATGTTATCAACGCAGTGCGAAGCATTACGATGGTGCGGAAGAAGCTATGCGCTGGCAGTCTCGCGGCGTAACAACCGTCGTTGTAACCAGCGGTGAAATGCTGCAA +>19 +CGTCTCTTGCAGGAAGGCGATCTGCTCTTTGCGCTGTCGCAGCATGCCGTGGAGTTTGCCCATGCGCAGCTGCAACAGCATGCCGTTAGCTGGCCTCACGCCCCCCGCTATTTCGCCATCGGGCGCACCACGGCGCTGGCCTTACATACCGCGAGCGGAATCGATGTTCGTTACCCGTTAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAACCATTGCCGGAAAGCGCGCGCTCATTTTGCGCGGCAACGGTGGCCGCGAACTGCTGGGCGAAACGCTGCGCGAACGCGGCGCAGACGTGACGTTTGTGGAGTGCTATCAGCGCTGTGCGAAACACTATGATGGCGCGGAAGAAGCAATGCGCTGGCACGCCCGCGGTATTAATACGCTGGTGGTCACCAGCGGTGAAATGTTACAA +>20 +ATTGCGGGATGCCAGAAGGTGGTTCTGTGCTCGCCGCCACCCATCGCTGATGAAATCCTCTATGCGGCGCAACTGTGTGGCGTGCAGGAAATCTTTAACGTCGGCGGCGCGCAGGCGATTGCCGCTCTGGCCTTCGGCAGCGAGTCCGTACCGAAAGTGGATAAAATTTTTGGCCCCGGCAACGCCTTTGTAACCGAAGCCAAGCGTCAGGTCAGCCAGCGTCTCGACGGCGCGGCTATCGATATGCCAGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCAACACCGGATTTCGTCGCTTCTGACCTGCTCTCCCAGGCTGAGCACGGCCCGGATTCCCAGGTGATCCTGCTGACGCCGGATGCTGACATTGCCCGCAAGGTGGCGGAGGCGGTAGAACGTCAACTGGCGGAACTGCCGCGCGCGGGCACCGCCCGGCAGGCCCTGAGCGCCAGTCGTCTGATTGTGACCAAAGATTTAGCGCAGTGCGTC +>21 +ATTGCCGGATGCAAAAAAGTGGTGTTGTGCTCGCCACCGCCTATCGCGGATGAAATCCTTTACGCTGCGCAGCTGTGCGGCGTGCAGGAAATCTTCAACGTCGGCGGCGCCCAGGCCATTGCCGCTCTGGCGTTCGGCAGCGAATCCGTGCCAAAAGTGGACAAAATTTTTGGCCCCGGCAACGCGTTTGTCACCGAGGCGAAACGCCAGGTCAGCCAGCGTCTCGACGGCGCGGCAATTGATATGCCTGCCGGCCCTTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCCACGCCAGATTTCGTGGCGTCTGACCTGCTCTCTCAGGCGGAACACGGCCCGGATTCTCAGGTCATCCTGCTGACCCCGGATGCCGGTATTGCGCAGAACGTCGCAGAGGCCGTCGAACGCCAGTTAGCGGAGTTACCGCGTGCAGAAACGGCGCGTCAGGCATTAAGCGCCAGCCGTCTGATCGTGACGAAAGACTTAGCCCAGTGCGTC +>22 +ATTGCAGGCTGTAAAAAAGTGGTGTTGTGCTCTCCCCCACCTATCGCCGATGAAATTCTGTATGCTGCGCAGCTCTGCGGCGTACAGGATGTGTTTAACGTTGGGGGCGCACAAGCTATTGCCGCGCTGGCATTTGGCAGTGAATCCGTGCCGAAAGTGGACAAAATTTTTGGCCCCGGTAATGCCTTTGTGACCGAAGCCAAACGTCAGGTGAGTCAGCGTCTGGACGGCGCCGCCATCGATATGCCAGCAGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCCACGCCGGATTTCGTTGCCTCTGACTTACTCTCGCAGGCCGAACACGGCCCCGATTCCCAAGTGATCCTGCTGACGCCGGATGCCGGTATGGCCAGCCGGGTTGCTGAAGCAGTAGAACGCCAGCTTGCAGCGCTGCCACGCGCTGAAACCGCGCGGCAGGCGTTAAGCGCCAGTCGTCTGATTGTCACCCGCTCCCTTGCGCAATGCGTA +>23 +ATTGCGGGCTGTAAAAAAGTGGTGCTGTGCTCACCGCCGCCGATTGCCGATGAGATCCTTTACGCGGCGCAGCTGTGCGGTGTGCAGGACGTGTTTAACGTCGGCGGCGCACAGGCCATTGCCGCGCTGGCGTTTGGTACAGAATCCGTGCCGAAAGTGGACAAAATCTTCGGGCCAGGTAACGCCTTTGTCACCGAGGCAAAACGTCAGGTGAGCCAGCGTCTGGACGGTGCGGCGATCGATATGCCCGCAGGCCCGTCGGAAGTGCTGGTGATTGCTGACAGCGGCGCAACGCCGGATTTCGTGGCTTCTGATTTGCTCTCCCAGGCTGAACACGGCCCGGACTCTCAGGTGATTTTACTGACGCCCGCTGCTGATATGGCGCGTCGCGTAGCCGAAGCTGTCGAACGCCAGCTGGCAGAACTGCCGCGAGCTGAAACCGCCCGCCAGGCACTGAACGCCAGCCGCCTGATCGTGACTAAAGATTTAGCGCAGTGCGTG +>24 +ATTGCCGGTTGTCAGAAGGTGGTGCTCTGCTCTCCTCCACCGATCGCCGATGAGATCCTGTACGCGGCGAAGCTGTGCGGCGTGCAGGCGATCTATAAAGTGGGCGGTGCGCAGGCGATTTCTGCCCTGGCGTTCGGAACAGTATCCATTCCTAAGGTCGACAAAATCTTTGGCCCGGGCAATGCCTACGTGACCGAGGCGAAGCGCCAGGTCAGCCAGCGTCTGGACGGCGCGGCGATTGATATGCCTGCCGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCTACACCGGATTTCGTGGCCTCTGACCTGCTCTCGCAGGCCGAGCACGGCCCTGACTCGCAGGTGATTTTACTGACGCCAGATGCCGACATGGCAAAACGCGTGGGCGACGCCGTTGAGCGTCAGCTGGCTGACCTGCCGCGGGCGGAAACGGCGCGTCAGGCGCTATCCGCCAGCCGCCTGATTGTGGCCCGCGATCTTGACCAGTGCATC +>25 +ATCGCCGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTACGCCGCGCAACTCTGTGGCGTGAAAGAAGTGTTTAACGTGGGTGGCGCACAGGCCATTGCCGCGCTGGCGCTGGGCACGGAGTCTATTCCAAAAGTCGATAAAATCTTTGGGCCGGGCAACGCCTATGTGACCGAAGCCAAGCGCCAGGTCAGCCAGCGTCTTGACGGCGCGGCAATCGATATGCCCGCCGGACCGTCCGAAGTATTGGTTATCGCCGACAGCGGCGCAACGCCGGATTTTGTCGCCTCCGACCTGCTTTCTCAGGCCGAGCACGGCCCAGACTCGCAGGTGATCCTGCTGACGCCGGACGCTAAGCTTGCCGAGGGCGTGGCCGAAGCCGTTGAACGCCAGCTCGCCGAGCTGTCCCGCGCCGACACCGCGCGTCAGGCGCTCTCCGCCAGCCGTTTAATCGTAGCGAAAGATCTGGCGCAGTGCGTG +>26 +ATCGCGGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTATGCGGCGCGTTTGTGCGGGGTACAGCAGGTCTATCAGGTGGGCGGCGCTCAGGCCATCGCGGCGCTGGCGTTTGGCACCGAGACCGTACCCAAAGTGGACAAAATCTTCGGGCCGGGCAATGCGTTTGTCACCGAAGCCAAACGTCAGGTCAGCCAGCGGCTGGATGGCGCGGCGATTGATATGCCTGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGATAGCGGCGCGACCACGGATTTCGTGGCCTCGGATTTGCTGTCCCAGGCGGAACACGGCCCGGATTCGCAGGTGATCCTGCTGACACCGGACAGCGCCATGGCGCAGGCGGTGGCCGACGCGGTTGAGCGTCAACTCGCCGAACTGCCGCGCGCGGAAACAGCTCGCCAGGCGCTGGCGGAAAGCCGCCTGATTGTGGCGCGCGATTTAGCGCAGTGCGTG +>27 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAATTTTTGACATTCTGGATATTCCGCACCATGTCGAAGTGGTTTCTGCTCACCGTACCCCCGATAAACTGTTCAGCTTTGCCGAAAATGCTGAAGAAAACGGCTTTCAGGTAATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCAGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTTGGCGTTCCGGTACAAAGCGCTGCGCTAAGCGGTGTGGACAGTCTCTATTCTATTGTACAGATGCCGCGCGGTATTCCGGTTGGCACACTGGCCATCGGCAAAGCTGGCGCCGCTAACGCGGCGCTGCTGGCGGCGCAAATTCTGGCCACCCACGATAACGCACTGCATCAGCGCCTTCGCGAC +>28 +AGCGACTGGACTACCATGCAATTCGCCGCCGAAATTTTTGAAATTCTGGATGTTCCGCACCATGTAGAAGTGGTTTCCGCCCATCGAACCCCTGATAAACTGTTCAGCTTCGCCGAAACGGCGGAAGAGAACGGATATCACGTGATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACATTGGTGCCGGTACTCGGCGTTCCGGTACAAAGCGCAGCATTAAGCGGTGTGGATAGCCTTTACTCCATTGTTCAGATGCCGCGTGGCATTCCGGTGGGTACACTGGCTATCGGCAAAGCCGGGGCTGCGAACGCCGCGCTGCTGGCAGCGCAAATTTTGGCCACACACGATAATGCGCTGCACCAGCGCCTGAGCAAC +>29 +AGCGACTGGGCTACCATGCAGTTCGCCGCAGAAATCCTCGATATTCTGAACGTACCTCACCATGTTGAAGTGGTTTCCGCCCACCGCACGCCCGATAAACTGTTCAGCTTCGCCGAAGACGCCGAAAGCAACGGTTATCAGGTGATTATTGCCGGTGCCGGCGGCGCTGCGCACTTACCCGGAATGATTGCCGCCAAAACGCTGGTCCCGGTATTAGGTGTACCCGTCCAGAGCGCCGCATTAAGCGGTGTCGATAGCCTCTACTCCATCGTGCAGATGCCGCGCGGCATTCCGGTCGGTACGCTGGCGATCGGTAAAGCCGGTGCCGCTAACGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>30 +AGCGACTGGGCTACCATGCAGTTCGCCGTCGAAATCTTCGAAATCCTGAATGTCCCGCACCACGTTGAAGTGGTTTCTGCTCACCGCACCCCCGATAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTATCAGGTGATTATTGCGGGCGCAGGCGGCGCAGCGCACCTGCCAGGCATGATTGCCGCCAAAACGCTGGTGCCGGTGCTGGGCGTGCCAGTACAGAGCGCCGCACTGAGCGGTGTCGATAGCCTCTACTCCATCGTACAAATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATTGGTAAAGCTGGCGCGGCAAACGCGGCATTACTGGCAGCACAAATTCTCGCGACTCACGATAAAGAGCTACACCAGCGTCTGAATGGC +>31 +AGCGACTGGGCTACCATGCAGTTTGCCGCCGAAATCTTCGATATCCTGAACGTTCCACACCACGTTGAAGTGGTTTCCGCACACCGCACCCCCGATAAGCTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAAGGGTTATCAGGTGATTATTGCCGGTGCTGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTGGGCGTGCCGGTGCAAAGCGCTGCGCTGAGCGGCGTGGACAGCCTCTACTCTATCGTCCAGATGCCGCGCGGCATTCCGGTCGGCACGCTGGCGATCGGCAAAGCGGGCGCGGCGAACGCGGCGTTACTGGCAGCGCAAATTCTGGCGACACACGATAAAGACCTGCGCCAACGTCTGGCGGAC +>32 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTCGAAATGCTGGACGTTCCGCACCATGTTGAAGTCGTCTCAGCCCACCGTACCCCTGATAAACTGTTCAGCTTCGCCGAAAGCGCTGAAGAAAACGGTTATCAGGTTATTATTGCGGGTGCTGGCGGTGCAGCGCATCTGCCGGGCATGATTGCAGCGAAAACGCTGGTCCCCGTGTTAGGCGTTCCGGTACAAAGCGCAGCGTTGAGCGGCGTAGATAGCCTCTACTCAATCGTGCAGATGCCACGCGGCATCCCCGTGGGTACGCTGGCGATTGGGAAAGCGGGTGCGGCAAATGCGGCCCTGCTGGCAGCACAAATTCTGGCAACACACGACAAAGCATTACATCAGCGTCTGAGCGAC +>33 +AGTGACTGGGCAACCATGTCTCATGCCGCAGATGTATTAGATACACTACAAATTCCTTACCATGTTGAGATTGTCTCTGCACACCGAACCCCTGATAAGTTATTTAGTTTTGCTGAAAAAGCAAAAAGTAATGGCTTTGATGTCATTATTGCTGGTGCAGGAGGAGCTGCCCATTTACCAGGAATGCTTGCAGCTAAAACGTTAGTACCCGTATTTGGTGTTCCTGTTCAAAGTGCGACATTAAGCGGTGTTGATAGCCTCTATTCAATCGTACAAATGCCAAAAGGTATCCCTGTAGGAACCTTAGCGATTGGTAAAGCAGGGGCTGCCAATGCGGCTTTATTAGCGGCTCAAGTTTTAGCGTTACATTCTCCTGCTATTTTAGATGCATTGACTGCA +>34 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTTGAAATCCTGAATGTTCCGCACCACGTCGAAGTGGTTTCCGCACACCGTACCCCGGACAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTACGAGGTGATCATTGCCGGTGCGGGCGGCGCAGCACATCTGCCGGGCATGATTGCCGCCAAAACGCTGGTGCCGGTACTGGGTGTTCCCGTGCAAAGCGCCGCGTTAAGCGGGGTGGATAGCCTTTACTCTATTGTCCAGATGCCGCGCGGTATTCCTGTCGGTACCCTGGCGATTGGTAAAGCAGGTGCGGCAAATGCCGCCCTGCTGGCCGCGCAGATCCTGGCGACGCATGATAAAGATTTGCACCAGCGTCTGGCGGAG +>35 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAACGGCGGAAGAGAACGGATATCAAGTGATTATTGCCGGCGCGGGCGGCGCGGCGCACCTGCCGGGAATGATTGCGGCAAAAACGCTGGTCCCGGTACTCGGCGTGCCGGTACAAAGCGCTGCGCTAAGCGGCGTGGATAGCCTTTACTCCATTGTGCAGATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATCGGTAAAGCCGGTGCGGCTAATGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>36 +AGCGACTGGGCCACCATGCAGCATGCCGCTGAAATTCTTGATGCCCTTGATGTTCCTTACCATGTTGAAGTGGTTTCCGCTCACCGCACGCCTGATAAGCTTTTCAGCTTTGCTGAATCCGCGCAGCACAACGGTTATCAGGTGATTATTGCTGGCGCAGGCGGTGCGGCGCATCTGCCGGGCATGATCGCCGCGAAAACCCTGGTGCCGGTATTAGGCGTGCCGGTGCAAAGCGCGGCCCTGAGCGGCGTGGACAGCCTCTACTCTATCGTGCAAATGCCGCGCGGCATTCCGGTAGGGACGCTGGCGATCGGCAAAGCGGGTGCTGCAAACGCCGCACTGCTGGCGGCGCAGATCCTCGCCCAGCATGACGATGCGCTACTGGCGCGTCTGGCGGCA +>37 +AAACGCTTCCTGAACGAACTGACCGCCGCTGAAGGGCTGGAACGTTATCTGGGCGCCAAATTCCCGGGTGCGAAACGTTTCTCGCTCGAGGGGGGAGATGCGCTGATACCTATGCTGAAAGAGATGGTTCGCCATGCGGGTAACAGCGGCACTCGCGAAGTGGTGCTGGGGATGGCGCACCGCGGTCGTCTGAACGTGCTGATCAACGTACTGGGTAAAAAACCGCAGGATCTGTTCGACGAGTTTGCCGGTAAACATAAAGAACATCTGGGTACCGGCGACGTGAAGTATCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGTCTGGTTCACCTGGCGCTGGCGTTTAACCCATCGCATCTGGAAATTGTGAGCCCGGTGGTGATGGGCTCCGTGCGCGCCCGTCTGGACCGACTGGACGAACCGAGCAGTAATAAAGTGCTGCCGATCACTATTCACGGCGACGCCGCGGTGACCGGCCAGGGCGTGGTTCAG +>38 +AAACGCTTCCTGAACGAACTGACCGCTGCAGAAGGGCTGGAACGTTATCTGGGGGCAAAATTCCCTGGCGCGAAACGTTTTTCGCTGGAAGGCGGCGATGCGTTAATTCCGATGCTCAAAGAGATGGTCCGCCATGCGGGCAACAGCGGCACCCGCGAAGTGGTGTTGGGAATGGCGCACCGTGGTCGCCTGAACGTACTGGTCAACGTGCTGGGTAAAAAACCTCAGGATCTGTTTGACGAGTTTGCCGGTAAACATAAAGAACATTTGGGCACCGGCGACGTGAAGTACCATATGGGTTTCTCGTCGGATATCGAAACCGAAGGCGGACTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTCAGCCCGGTAGTGATGGGGTCTGTGCGCGCACGTCTCGACCGGCTCGACGAACCGAGCAGCAACAAAGTGTTGCCAATCACCATTCATGGTGATGCAGCAGTTACCGGGCAGGGCGTGGTTCAG +>39 +AAACGCTTCTTAAGCGAACTGACCGCCGCTGAAGGCCTTGAACGTTACCTCGGCGCAAAATTCCCTGGCGCAAAACGCTTCTCGCTGGAAGGCGGTGACGCGTTAATCCCGATGCTTAAAGAGATGATCCGCCACGCTGGCAACAGCGGCACCCGCGAAGTGGTTCTCGGGATGGCGCACCGTGGTCGTCTGAACGTGCTGGTGAACGTGCTGGGTAAAAAACCGCAAGACTTGTTCGACGAGTTCGCCGGTAAACATAAAGAACACCTCGGCACGGGTGACGTGAAATACCACATGGGCTTCTCGTCTGACTTCCAGACCGATGGCGGCCTGGTGCACCTGGCGCTGGCGTTTAACCCGTCTCACCTTGAGATTGTAAGCCCGGTAGTTATCGGTTCTGTTCGTGCCCGTCTGGACAGACTTGATGAGCCGAGCAGCAACAAAGTGCTGCCAATCACCATCCACGGTGACGCCGCAGTGACCGGGCAGGGTGTGGTTCAG +>40 +AAACGCTTCCTCAGCGAACTGACTGCAGCGGAAGGTCTGGAACGCTACCTGGGCGCGAAATTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGTGATGCGTTAATCCCAATGCTCAAAGAGATGATCCGCCACGCCGGTAACAGCGGTACCCGTGAAGTGGTACTGGGTATGGCGCACCGTGGTCGTCTGAACGTCCTGGTTAACGTGCTGGGTAAAAAGCCGCAGGATCTATTCGACGAATTTGCGGGCAAACATAAAGAACACCTCGGTACCGGTGACGTGAAGTACCACATGGGCTTCTCATCGGATATCGAAACCGAAGGCGGTCTGGTGCATCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTTATCGGTTCCGTACGTGCACGCTTGGATCGTCTGGACGAGCCGAGCAGCAATAAAGTGCTGCCAATCACTATTCATGGTGATGCGGCAGTAACCGGGCAAGGCGTGGTTCAG +>41 +CGTACTTTCCTTGAAGAGCTGACTGCCGCTGAAGGTTTAGAGCGCTATCTTGGTGCGAAATTCCCTGGTGCTAAACGTTTCTCTCTCGAAGGGGGGGATGCCTTAGTTCCGATGACCAAAGAGATGATCCGTCACGCGGGTGCCAGTGGCATGCGTGAAGTGGTGATTGGGATGGCGCACCGCGGTCGCTTGAACATGCTGGTCAACGTTCTGGGTAAAAAACCGCAAGATCTGTTTGATGAGTTTGCCGGTAAACATGGCGAAGGCTGGGGCACAGGTGATGTGAAATATCACCAAGGTTTCTCCGCTGACTTTGCGACACCGGGCGGTGATGTTCACTTAGCACTGGCTTTCAACCCATCGCATCTTGAGATTGTGAACCCTGTTGTGATGGGTTCAGTTCGCGCGCGTCAAGACCGCCTAGGTGATGAAGATGGCAGTAAAGTGCTACCTATCACTATCCATGGTGACTCTGCGATTGCCGGACAAGGTGTGGTGGCT +>42 +AAACGCTTCCTGAGCGAGCTGACCGCAGCCGAAGGCCTTGAGCGCTACCTGGGCGCGAAGTTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGCGACGCGCTGATCCCGATGCTGAAAGAGATGATTCGCCACGCGGGCAACAGCGGCACGCGTGAAGTGGTGCTGGGTATGGCGCACCGCGGTCGTCTTAACGTGCTGGTTAACGTGCTGGGTAAAAAACCGCAGGACCTGTTCGACGAGTTCGCGGGCAAACACAAAGAACACCTTGGCACCGGCGACGTGAAGTACCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGCCTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTAATTGGTTCGGTACGTGCCCGTCTGGATCGGCTGGACGAGCCGAGCAGCAACAAAGTACTGCCGATCACCATTCACGGCGACGCCGCGGTGACCGGTCAGGGCGTGGTTCAG +>43 +GTGCTGGGCCGTAATGGTTCCGACTATTCCGCCGCCGTGCTGGCCGCCTGTTTACGCGCTGACTGCTGTGAAATCTGGACTGACGTCGATGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGACTGCTGAAATCGATGTCCTACCAGGAAGCGATGGAACTCTCTTACTTCGGCGCCAAAGTCCTTCACCCTCGCACCATAACGCCTATCGCCCAGTTCCAGATCCCCTGTCTGATTAAAAATACCGGTAATCCGCAGGCGCCAGGAACGCTGATCGGCGCGTCCAGCGACGATGATAATCTGCCGGTTAAAGGGATCTCTAACCTTAACAACATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGGATGATTGGGATGGCGGCGCGTGTTTTCGCCGCCATGTCTCGCGCCGGGATCTCGGTGGTGCTCATTACCCAGTCCTCCTCTGAGTACAGCATCAGCTTCTGTGTGCCGCAGAGTGACTGC +>44 +GTGCTGGGGCGTAACGGTTCCGACTATTCCGCTGCGGTACTGGCCGCCTGTTTACGCGCCGACTGTTGCGAAATCTGGACGGACGTTGACGGTGTGTATACCTGCGACCCGCGCCAGGTGCCGGATGCCAGACTGCTGAAGTCAATGTCCTATCAGGAAGCGATGGAACTTTCCTACTTCGGCGCCAAAGTGCTTCACCCGCGTACCATTACTCCCATCGCTCAATTCCAGATCCCATGTCTGATAAAAAATACCGGTAATCCGCAAGCGCCGGGCACGCTGATTGGCGCCAACAGCGATGAAGACGGGCTACCGGTAAAAGGCATCTCGAACCTCAATAATATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGCATGGTCGGGATGGCGGCGCGCGTGTTCGCCACCATGTCGCGTGCCGGGATTTCGGTAGTGCTGATCACCCAATCCTCTTCGGAGTACAGCATCAGCTTCTGCGTGCCGCCAAAGCGATGC +>45 +GTGCTGGGCCGTAACGGCTCCGATTATTCCGCCGCCGTACTGGCCGCCTGTTTACGCGCTGACTGTTGTGAAATCTGGACTGACGTCGACGGCGTGTATACCTGCGACCCGCGTCAGGTGCCAGACGCCAGGCTGCTGAAGTCGATGTCTTATCAGGAAGCAATGGAGCTTTCTTACTTCGGCGCTAAAGTACTACATCCGCGCACTATTACTCCTATTGCCCAGTTCCAGATCCCTTGTCTGATTAAAAATACCGGCAATCCACAAGCGCCCGGTACGCTGATCGGCGCTGCCAGCGACGATGATGCTCTGCCGGTTAAAGGGATTTCTCACCTTAACAACATGGCGATGTTTAGTGTCTCCGGTCCGGGGATGAAAGGCATGGTGGGTATGGCGGCGCGCGTTTTTGCCGCTATGTCACGTGCGGGAATCTCGGTGGTGTTGATCACGCAATCTTCATCTGAATACAGCATCAGCTTCTGCGTGCCGCAGAGCGACTGC +>46 +GTGCTGGGCCGCAACGGTTCTGATTACTCCGCTGCGGTGTTGGCTGCCTGCTTACGCGCCGACTGTTGTGAGATCTGGACTGACGTTGACGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGGTTGCTGAAGTCGATGTCCTATCAGGAGGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTCCTTCATCCTCGCACCATCACCCCCATTGCCCAGTTCCAAATCCCATGCCTGATTAAAAACACCGGAAACCCGCAGGCCCCTGGTACGCTGATCGGCGCCAGCGTGGATGAAGACGAACTGCCGGTGAAAGGGATCTCGAACCTGAACAATATGGCGATGTTCAGCGTTTCCGGCCCAGGAATGAAAGGGATGATCGGGATGGCGGCGCGCGTCTTCGCGGCAATGTCCCGCGCGGGGATCTCCGTGGTGCTGATCACGCAATCCTCTTCTGAATACAGCATCAGTTTCTGCGTACCGCAGGGCGACTGC +>47 +GTGTTGGGGCGCAATGGCTCTGACTACTCTGCCGCTGTGCTGGCTGCCTGTTTACGCGCGGACTGTTGTGAGATCTGGACCGATGTCGACGGCGTATATACCTGCGATCCGCGCCAGGTACCCGATGCCCGACTGCTGAAGTCGATGTCTTATCAGGAAGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTTCTGCATCCGCGCACCATTACCCCAATTGCCCAGTTCCAGATCCCGTGCCTGATTAAAAATACCGGCAATCCACAAGCGCCTGGCACGTTGATCGGCGCCAGCAGTGATGAAGACGATTTGCCGGTAAAAGGTATTTCTAACCTCAATAACATGGCGATGTTTAGCGTCTCCGGCCCTGGAATGAAAGGCATGGTAGGCATGGCGGCGCGCGTTTTTGCCGCGATGTCGCGTGCGGGCATCTCGGTGGTGCTGATCACGCAGTCTTCTTCTGAATACAGCATCAGCTTCTGCGTTCCGCAGGGCGACTGC +>48 +GTATTAGGTCGCAATGGTTCAGACTACTCAGCTGCAGTATTAGCAGCCTGTTTACGTGCTAAATGCTGTGAAATTTGGACTGATGTTGACGGTGTTTATACTTGTGATCCACGTTTAGTGCCTGATGCACGTTTGTTAAAAGGCATGTCATATCAAGAGGCAATGGAACTGTCTTACTTTGGTGCCAAGGTACTTCATCCTCGTACAATTGCGCCTATTGCCCAATTCCAAATACCTTGTTTAATTAAAAATACGGGCAATCCAGATGCGCCGGGTACCTTGATTGGTGATGGTCAAAAAGATGAGAGCACACCTGTTAAAGGAATAACTAACCTTAATAATATGGCAATGATCAACGTATCTGGGCCTGGAATGAAAGGAATGGTAGGAATGGCGGCTCGCGTGTTCTCGGTAATGTCGAGAGCGGGGATTTCAGTTGTTCTAATCACACAGTCTTCTTCTGAATACAGCATTAGTTTTTGTGTGCCACAAAAAGAGCTG +>49 +GTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGCTGCCTGTTTACGCGCCGATTGTTGCGAGATTTGGACAGACGTTGACGGGGTCTATACCTGCGACCCGCGTCAGGTGCCCGATGCGAGGTTGTTGAAGTCGATGTCCTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCCCGCACCATTACCCCCATCGCCCAGTTCCAGATCCCTTGCCTGATTAAAAATACCGGAAATCCTCAAGCACCAGGTACGCTCATTGGTGCCAGCCGTGATGAAGACGAATTACCGGTCAAGGGCATTTCCAATCTGAATAACATGGCAATGTTCAGCGTTTCCGGCCCGGGGATGAAAGGAATGGTTGGCATGGCGGCGCGCGTCTTTGCAGCGATGTCACGCGCCCGTATTTCCGTGGTGCTGATTACGCAATCATCTTCCGAATACAGTATCAGTTTCTGCGTTCCACAAAGCGACTGT +>50 +GTGCTCGGGCGCAACGGCTCCGATTATTCCGCAGCGGTACTGGCAGCGTGTTTACGCGCCGATTGTTGCGAGATCTGGACTGATGTCGATGGTGTCTATACCTGCGACCCACGTCAGGTACCGGATGCCCGATTACTTAAGTCGATGTCGTACCAGGAGGCTATGGAACTCTCCTATTTCGGCGCCAAAGTCCTCCATCCTCGAACCATCACTCCCATCGCCCAGTTCCAGATTCCCTGCCTGATAAAAAATACCGGAAACCCGCAAGCACCAGGAACGCTGATTGGCGCCAGCCGCGACGAAGATGATCTGCCGGTGAAGGGCATTTCAAATCTCAATAATATGGCGATGTTCAGCGTCTCCGGGCCGGGGATGAAGGGAATGGTCGGCATGGCTGCTCGCGTGTTTGCGGCAATGTCTCGCTCAGGAATTTCGGTAGTCCTGATTACGCAATCCTCCTCTGAGTACAGCATTAGCTTCTGTGTACCGCAGGCTGACTGT +>51 +GTGCTGGGGCGTAACGGCTCTGACTACTCCGCCGCCGTGCTGGCGGCCTGCTTACGCGCGGACTGCTGTGAGATCTGGACTGACGTCGACGGCGTTTATACCTGCGATCCGCGCCAGGTACCGGACGCCAGGCTGCTGAAGTCGATGTCGTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCGCGTACCATCTCCCCGATTGCCCAGTTCCAAATCCCTTGCCTGATTAAGAATACCGGTAACCCTCAGGCGCCGGGCACGCTGATTGGCGCCAGCGCGGATGAAGATGAACTGCCGGTGAAAGGCATTTCTAACCTCAATAACATGGCGATGTTCAGCGTCTCCGGCCCGGGGATGAAGGGCATGGTCGGCATGGCGGCACGCGTATTTGCCGCTATGTCCCGCAACGGGATCTCCGTGGTGCTGATCACGCAGTCTTCTTCCGAATACAGCATCAGCTTCTGCGTTCCGCAGGGTGATTGC +>52 +GTATTAGGCCGTAACGGTTCCGACTACTCCGCCGCCGTGCTGGCCGCGTGTTTGCGCGCCGACTGTTGTGAGATCTGGACTGACGTCGACGGCGTCTATACCTGCGACCCGCGCCAGGTGCCGGACGCCAGGCTGCTGAAGTCGATGTCGTATCAGGAAGCCATGGAACTCTCCTACTTCGGCGCTAAAGTTCTCCACCCCCGCACCATTGCCCCCATCGCCCAGTTCCAAATCCCCTGTCTGATCAAAAACACTGGTAACCCGCAAGCGCCAGGCACCCTGATCGGTGCCAGCAGCGATGAAGACGGCCTGCCGGTGAAGGGCATCAGTAACCTGAATAATATGGCGATGTTCAGCGTCTCTGGTCCGGGCATGAAAGGCATGGTGGGAATGGCGGCGCGCGTGTTCGCGGCGATGTCCCGTGCGGGCATCTCGGTGGTGCTGATCACCCAATCGTCTTCTGAATACAGCATCAGCTTCTGCGTGCCGCAGGCCGACAGC diff --git a/locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.ndb b/locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.ndb new file mode 100644 index 0000000000000000000000000000000000000000..dfa7d2e267e27fdcac41d817c56044823912df6b GIT binary patch literal 20480 zcmeI%u}*_f6adgGnn+wQ#(ywwF7Ez{QB0atSk$?{)y4En{D3a@50nD0P>05(Iy5;9 zy!&{M%k5#hB_bVp8+{VEZl~uF`CS|BJbm@Y&P}7mw14ZFqjezx0t5&U zAV7cs0RjXF5FoI>z|eR9NB=)$AEK`2tS>!{C;j~AzWslp33bORK$56a26G33lxW~VPj%q;tg;sy%comRr(uEevFOhThS)()6H3s z6>seN|G!V$0yw3B1Wqd;f-?%p;H&~dIH!OV&MP2> z3kt~Lq5^`rq<|zYBhZn=6$K=5RRKv{Q$P~e6_CUY1tf7(0ZD8qAckiNjy+M5)T!S#HIq0c%*}tG zZ^sMi6f4nz(xlPXYgh@Z&oHb@XD#FU)&IfI*Y>wVT&>YxPr&nmV7~6!xPjbe;7A0gSS#c1h+7YcT!^%w=s_o zQs4wev51dSQy0duf=|-XQ@Dd?*pZH%#)SS9@{s-)!w2|D{!^f4PZ_)9Tf|rLTNJwk#BTooH4p9;?y@ZRlXt;Imt1zmfvc{$?uMIgx$TZacinU3 wz6Tz9Z*opT}eelsIpMCMwH{bp6(=WgMapr&Q3tcn?%>V!Z literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.nsq b/locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.nsq new file mode 100644 index 0000000000000000000000000000000000000000..7fc5e3b8e6fafd8419a1bdda10966df4ce2396be GIT binary patch literal 6297 zcmZ9Mk3W-p|Hn6KrJ`R5o!jXgnzNjv&Ns@A(o}>_DwShXLu_kiQ(xyW9GXe`!NxHA zk)0e|rLs<^qAApUQm1oQ;>2k>gtg6b7dlbPee``_*Vg%-$Ndjn&pn^7_h&fU?yf3w z)y>@g_lZg&z0+@j-e8eXQ193s( zCt37>1RIlK%0=ZTQ{iTk#0hMf?Y1WEO+e(QDM92EsP-xy!p*!m9%CkAXH}HyS zDnQM^k(N%2$!BfaKI=wncT_Do{_gI*p>sGHGN5x(vKc%)LRK0R+F#`;{hW>R`#{;VsepQ=fwXjqR%F{|vn;5!`(c&qz}?-0p>s?bvI9KDzLYIY zjBWrrxM%LeO(^@}j`)3MXmc}(7TY2#76IH1`;o{kwn#>E-ZN}=i{q!fggrAM#JEh<0j{B0I+gw}{cx7nn>c1CW_g;XD z7TIpXEyII*Eji)l?l{;zL-q$#k;KYoFkyEkBB>J><5s3c3#Lj3*D5lgyn11GCX}z2 zG<;cg_rwF8hVG7_ba@}p#h5$LiF!sIxC4EN zQHaskxbkKxPXZl26=bl`gWYL2zVN7*D{meAS{kvGRCa(zV+vlsxu;jqY0H>L%qg3P zNJZ5uO|vOknyr0)DLVl^kLcd6dLpT>qqVSrZth^ZcYgrBIRF1%O?UIE04OnSn1nI zhcJ2wxc`klU3ePkJd|F6fIGdXGe&=BD+hH~Tg$TxE#+<1B}zl)D{08|WwQ8`NG1i{ zy{V5rizzVk!iKz$!tSX1J1CzKhQRJ=7WXl5KkQHJ!*;(9^yU*|*zVJXSC(Xy03CL( z0s6ML51#1lfd04L*@yhF-EE8D>iAzvLtN`*+t1TNuD^Z*?kd_WmO$$RyXThz9d+;O z4KY&<1zovVdAPd2=pmbXBT%}f{3D=$oMCZqL|U68qTeQT=Vmwu-KPv%iw!@9}5YY!4NxOSIe$xI?yRovV3!jW_SQsllP#(6je>&y{Z-=uku zb{ezgpnk(f)fP{WWQX$jw%hI6tyLw#C8iT!#@JOY5vMyzicZ`A4MyRy3dfo@APs~2 z-&Q*fAWY5RrGqFeL2keGJ*-QncHyL4i{uDKBsf_LYSwCw?eJtY#OgzK{v+(jaGZOE zk|A%`it9^4I(zfGD*^Wivt`r9MDiua z66B`&TI9yzoXKFKOk}oEutE;G^j(FH(=0UiAkXhJ-77THU2ki*)LA>;sFt;}{Gj9C z)o^=R?PF(x5Trlr7m8EI5~jTWQmnP(Ru_|ECPG$~^eT8R2_u&x*#cuRPO!;T=+c*N z{;+r2)2mw<5s589o@e&EmvxMnwrksKFSJ!^=^LTr3Ng!X-g}sH-p$$QXq(#2iW`Wu z|4mP)>wqs$6Mqfsn5Yp#N5xDpvP#mcNS+sy@&j}P825=pC#1*Au8&Am1$pYP zsVYKtQQJ|o5V`5wFR4dQJ{n1Jy7nFh7cxDNX2WD{H7kPD(zuLgaju3qru?HTDKCzn z=wxXGoBYAd6U@pL?Q>Y_OXz6xaDhqWQ1A0umD24ySR&g!_u0i+O}ud1gM=QooFtZ} z*G@?GcDD(dR?M9$XA@%o?QFN6>m3%>7>}^;73!DPar-sHMbZWA;^68D?dyEwH{tw} z+I{2UIy#?Gty-v0^hS17O$2~%UfF+F_W z{Mvo5!+)jocU4O-8bTK*9*c=%EGHgsQ%dj%m$Zf$8N+{k*yh8fC_Ow2qjzmjWr%G1 z?q^8S70KZNt%SFAC3fvnR?U>8zgsWB=p2XG7tVGRuJ`e<#wvumrBLr#kGhM&y^K*k zVNNf&ofaNFzczY&J?1XGSiAJBQMh>t!|SPB@2bKx*`#Zjdob%O!v-ZjFbSn+RXS4> z^dgVgTWoND5x%$;+)Kcng-l&og3;|KYP|`u;Ev;Z9|gLaT~w$~tmU2rx(dnz-Sk?F zmIwFraNU3S43=so!c{I6X2mhQYI5~56`sK$xf3F({Ko}Y`P@L-WT>|~z4Ev12@H`3 z{T5q@mQP5vdB%a=`@TXZazA}I{?CSa_EY6`4!e2IBq3J53So<({9o=TV0Tk`z7gC@ zYWZmx-9x&t2Cu9;95cxEI+d?iDA4jJx`94f%UnIMxin}M&EhUa-EH^Wq86t&@oZ+d z5}uhX?wKXu|2~duSYba^TSwRp<=12M0fcQ|sAp7h<1P;0L$8ieJ*E97-}oA(hXNh0 zuIhxB{+O~>7}L-2VjO)=mCk2!NtNA^F{6)(Hmi}%%G_1f@>cqNwsKN&F3dYM{29BF zv_%2sOZGaqQJUsK`8o%;tnL7Q+t>* z$eqDJZyxW(AgL5K;EpiSo6jY>pK^SIVbi$jg6zfUe8GdXKHrun8VHFhTHex}@r z$Zh+|b|w%um4;Pl3yAIoR)%e#2pB*6v{HA(?a~2bBATF>nOp74d(V(!j6e3&AJksAqmfo%@vF(+8onYfNKddqD6vk+k zM%z!Uq!Y&HqYUbNVA!D^zz|wIROlX-R%c+}X{DdL#Q z+t8{{wlTqGkvymodG7%XTh(FM@{qpk?~k5Dv+MinE46{!y% z&m&X3{jxq;bO%~3Q*ASR*G{aQ{U7Dc*Ywo?h_)7#TK9;ypMpKcOG4mwsq>R}RbiWB zd$_25%^Zayu>|!%H-}d0aUMm^995{{u700{R-Cn294B-SZv{nPZmgAi&}E4_n{_2& zjmU9~0q?_drasm)U1Uvv8O~I1c?vx2Of4q_S#+*ok2IbSHmQ`OJGHA%1`b9w>FT-- z4DL{Lri-)Z57#yNq7PoN4^!37Wpz!jaxHXKC`PZtjb}3PtrMBE*rJrV(MHOVVQNHD z5V3?R9Bw&jD&aNI3AgJBMxb{VjSN`mTb~63y=~>$*WBt=_&B7%LXXleI=QMd>POv~ zK0_k+5iJeq#gwahd&gJ9qOj%8H5U4}MAV&>Ll{ew13eM{U0UWW&fygMXyf%GI%-I! zh29J9Jk(u{(TfYfU5mO0_k^ey3))ckf$@(LkP0>C9@V>OFz}yIKmNh#o6vKILUa73 zK-U2MA?GmF$GPS#=DvqGqd>CHJCkrAQ!W@!Vp`~;6#H%94s_ZL;)NLDu=_a+{kQWd zeW|3VJwDG|htb>A;12XHn0upo3tAp?e;L*E2dDeiQ`XSqOc$U-dHWB*o&BkE4GHMU z2V;d^1ya8EOJZt{Mlhaf#J8rW%{n8BT^>CE^q7z&y-%r9I4s6?&zWYdC%lI8#YH1J zwEP!wr_O9t2MF#tdcUR)OvO7RS4KA7asTcC>^>OS|5c0bZ07`%JCpjqSNcVg>zcNE zA3Pi`$Ph~pc{iDE!0tq^d`UDZhY0S%_wMY&tfFYRI-NEI-5uyEaA!6=As8|D>5_c3 zd>ww?lHj-4?tfif#ou-Z_n(G)ZbA7T%$;&UU*u@Hx=ZNpnrAT%&GGG2hQ`Dgxy33_ z^{=iPX!wG6mp(u(Q`1~SGx}Er?2Z&BoJ{{V8`<_*wv*QJM#ZX~&7oac{83xyAG*UC zpSbGS%E)-piay%tveo-zr!7PRUUIw3Py5dj1y&>StA9#&G%{&LJ;Rl2c3%FCf5Ede z`-NWO;~u1WrX-+r!v&PiC}@dWT~*cM_Tnx*9;K6kekovgC(sY>2D(qSlhSdlYt`3l zL%Ra_*HQXJ-RbvaA4)gq z%j`rzk0DehfICKK6|`XPZr~nIy`+wG4bSLr2>7;BxZ(SQ;J*LU1Ag)k#(oP#-6v7^ z;)m-Q9B@~GI~jFf_qE0SdH3Zr{<9*QE&~0XH1!fM$-ZZ}#W!*Ful&z3x`c?jQwhzw z+f>6ZU*}9&SMD}9m@M@9Xoabozg|-kK&yLQQ^CWfnKJp7P`38 z`j@SaqfZ0(&Xn%*PoVrN)ID3V;W9I+sC!uDo9OohrT=MYlOXOvSD(opDhF<7urwz9 z$So#DPiSyUzDrM_mYs_tM_b*k^yL|Ta>ucqn7ih^yJ<1dB|4$Y6z#hAy8V0RSa%-+ z_a93!_k;#s(v}{S-iXn&4dW>N;xAf<=8tZNJ-v8m>Budkt*U=rm9E8Y4rs@aH5Yl(nmj8 zu|Ias${PO%$$hmzcla?6sZ=)i{M7JqlDKp7ci)W|{pGtC`gt~9xth1GaCNsc446AZ z+_HCF)kiIE^_JbE$j2tp@^JItqgS_odA1YPacmcM^Jdgt@Z0)eZ`WUk%cS7GhW4Ot z-L}3t3v00Qu)9MG>V8m?aETe!4&~P*`aX&N{8K1DK9A^rL(?P2u8w%YM1-3M`sylO z!xztizWvfU8aaAwv>_ll@>uH0^xc2Li~Gj!kz+>}N*~WaZ(b0!J{Vq{sZ1I#^zWnf z*R7-UqBq~k?XErJzs#Yb^i-v?Rk5Lw6+XA;U-UJJe&~yPnf;pcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.nto b/locidex/example/manifest_in/fails/fails_name/blast/nucleotide/nucleotide.nto new file mode 100644 index 0000000000000000000000000000000000000000..ad19396e81aff427697a109c3c035ac73cb27f3f GIT binary patch literal 216 zcmXBFg${xM06;;oyHK$M6YTo`U-LHJc6+}dFSG(hN|dQkrAD0wO0 +KFRPGHADYTYHQKYGVRDYRGGGRSSARETAMRVAAGAIAKKYLQQEFGIEVRAYLSQMGDVAIDKVDWNEIENNDFFCPDVDKVAAFDELIRELKKEGDSIGAKIQVVATGVPVGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVRQKGSQHRDPLTPQG +>1 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKHGIVIQGCLTQMGDIPLEIKDWQQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVANGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>2 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGVEIRGCLTQMGDIPLEIKDWSQVELNPFFCPDPDKIEVLDELMRGLKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>3 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLKEKLGIEVRGYLSQLGPITCDLVDWSIVESNPFFCPDPSRLDALDEYMRALKKEGNSIGAKVTVVAQGVPAGFGEPVFDRLDADLAHALMSINAVKGIEIGDGFGVVTLKGTENRDEITKKG +>4 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGIEIRGCLTQMGDIPLEIKDWSQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKDG +>5 +VFRPGHADYTYEQKYGFRDYRGGGRSSARETAMRVAAGAIAKKYLQQKFGIVIRGCLSQMGDIPLAIKDWDQVELNPFFCADADKLDALDELMRGLKKEGDSIGAKVTVVADGVPAGWGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVKLRGSQNRDEITKAG +>6 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAQKFGVVIRGCLTQMGDIPLEIKDWDQVEQNPFFCPDPDKIEALDELMRALKKEGDSIGAKVTVVADSVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFGVVQLRGSQNRDEITTAG +>7 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>8 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>9 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>10 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQPLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>11 +MEMVARVALVQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGERMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGQSLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>12 +MEMIARVTLTQPHDAGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGDSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>13 +MEMIARVALSLPHQAGATTVPARKFFDICRGLPEGAEIAVTLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCAMPVGQPLPNHSVIVPRKGVLELMRMLDGGDSPLRIQ +>14 +SALTENDLVFALSQHAVTFADAELQQQGKSWPSLPRYFAIGRTTALALHTVSGFNIHYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELIGETLTARGADVDFCECYQRSAKYYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>15 +AALGESDLLFALSQHAVAFAQSQLHQQDRKWPRLPTYFAIGRTTALALHTVSGQKILYPQDREISEVLLQLPELQNIAGKRALILRGNGGRELIGDTLTARGAEVTFCECYQRCAIHYDGAEEAMRWQSREVTTVVVTSGEMLQ +>16 +ATLTENDLVFALSQHAVAFAHAQLQRDGRNWPASPRYFAIGRTTALALHTVSGFDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRGRELLGETLTARGAEVSFCECYQRSAKHYDGAEEAMRWHTRGVTTLVVTSGEMLQ +>17 +AALTDNDLVFALSQHAVAFAHAQLQQQELDWPVQPRYFAIGRTTALALHTVNGCDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELLGKTLTERGAEVTFCECYQRSAKHYDGAEEAMRWHSRGVTTIVVTSGEMLQ +>18 +ETLGDNDLLFALSQHAVSFAHAQLQQQGLNWPSLPHYFAIGRTTALALHTVSGHKIRYPQDREISEVLLQLPELQSIAGKRALILRGNGGRELIGQTLTSRGADVTFCECYQRSAKHYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>19 +RLLQEGDLLFALSQHAVEFAHAQLQQHAVSWPHAPRYFAIGRTTALALHTASGIDVRYPLDREISEVLLQLPELQTIAGKRALILRGNGGRELLGETLRERGADVTFVECYQRCAKHYDGAEEAMRWHARGINTLVVTSGEMLQ +>20 +IAGCQKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADIARKVAEAVERQLAELPRAGTARQALSASRLIVTKDLAQCV +>21 +IAGCKKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGIAQNVAEAVERQLAELPRAETARQALSASRLIVTKDLAQCV +>22 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGMASRVAEAVERQLAALPRAETARQALSASRLIVTRSLAQCV +>23 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGTESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPAADMARRVAEAVERQLAELPRAETARQALNASRLIVTKDLAQCV +>24 +IAGCQKVVLCSPPPIADEILYAAKLCGVQAIYKVGGAQAISALAFGTVSIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADMAKRVGDAVERQLADLPRAETARQALSASRLIVARDLDQCI +>25 +IAGCKKVVLCSPPPIADEILYAAQLCGVKEVFNVGGAQAIAALALGTESIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAKLAEGVAEAVERQLAELSRADTARQALSASRLIVAKDLAQCV +>26 +IAGCKKVVLCSPPPIADEILYAARLCGVQQVYQVGGAQAIAALAFGTETVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATTDFVASDLLSQAEHGPDSQVILLTPDSAMAQAVADAVERQLAELPRAETARQALAESRLIVARDLAQCV +>27 +SDWATMQFAAEIFDILDIPHHVEVVSAHRTPDKLFSFAENAEENGFQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLRD +>28 +SDWTTMQFAAEIFEILDVPHHVEVVSAHRTPDKLFSFAETAEENGYHVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLSN +>29 +SDWATMQFAAEILDILNVPHHVEVVSAHRTPDKLFSFAEDAESNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>30 +SDWATMQFAVEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKELHQRLNG +>31 +SDWATMQFAAEIFDILNVPHHVEVVSAHRTPDKLFSFAESAEEKGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLRQRLAD +>32 +SDWATMQFAAEIFEMLDVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKALHQRLSD +>33 +SDWATMSHAADVLDTLQIPYHVEIVSAHRTPDKLFSFAEKAKSNGFDVIIAGAGGAAHLPGMLAAKTLVPVFGVPVQSATLSGVDSLYSIVQMPKGIPVGTLAIGKAGAANAALLAAQVLALHSPAILDALTA +>34 +SDWATMQFAAEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYEVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLHQRLAE +>35 +SDWATMQFAAETAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>36 +SDWATMQHAAEILDALDVPYHVEVVSAHRTPDKLFSFAESAQHNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDDALLARLAA +>37 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLINVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>38 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>39 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDFQTDGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>40 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>41 +RTFLEELTAAEGLERYLGAKFPGAKRFSLEGGDALVPMTKEMIRHAGASGMREVVIGMAHRGRLNMLVNVLGKKPQDLFDEFAGKHGEGWGTGDVKYHQGFSADFATPGGDVHLALAFNPSHLEIVNPVVMGSVRARQDRLGDEDGSKVLPITIHGDSAIAGQGVVA +>42 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>43 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDDDNLPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>44 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGANSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFATMSRAGISVVLITQSSSEYSISFCVPPKRC +>45 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGAASDDDALPVKGISHLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>46 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASVDEDELPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>47 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>48 +VLGRNGSDYSAAVLAACLRAKCCEIWTDVDGVYTCDPRLVPDARLLKGMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPDAPGTLIGDGQKDESTPVKGITNLNNMAMINVSGPGMKGMVGMAARVFSVMSRAGISVVLITQSSSEYSISFCVPQKEL +>49 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDC +>50 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRSGISVVLITQSSSEYSISFCVPQADC +>51 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTISPIAQFQIPCLIKNTGNPQAPGTLIGASADEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRNGISVVLITQSSSEYSISFCVPQGDC +>52 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQADS diff --git a/locidex/example/manifest_in/fails/fails_name/config.json b/locidex/example/manifest_in/fails/fails_name/config.json new file mode 100644 index 0000000..bf16ecf --- /dev/null +++ b/locidex/example/manifest_in/fails/fails_name/config.json @@ -0,0 +1,12 @@ +{ + "db_name": "Locidex Database", + "db_version": "1.0.0", + "db_date": "04/04/2024", + "db_author": "test", + "db_desc": "", + "db_num_seqs": 53, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" +} \ No newline at end of file diff --git a/locidex/example/manifest_in/fails/fails_name/meta.json b/locidex/example/manifest_in/fails/fails_name/meta.json new file mode 100644 index 0000000..f3b88fa --- /dev/null +++ b/locidex/example/manifest_in/fails/fails_name/meta.json @@ -0,0 +1,1181 @@ +{ + "info": { + "num_seqs": 53, + "is_cds": "True", + "trans_table": 11, + "dna_min_len": 220, + "dna_max_len": 350, + "dna_min_ident": 80, + "aa_min_len": 73, + "aa_max_len": 116, + "aa_min_ident": 80 + }, + "meta": { + "0": { + "seq_id": 0, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 609, + "dna_seq_len": 501, + "dna_seq_hash": "4811bc98591c74954ace3cb487330482", + "aa_seq_len": 167, + "aa_seq_hash": "a8fbcf8179d8548f980b7b15f29de1d4", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "1": { + "seq_id": 1, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 614, + "dna_seq_len": 501, + "dna_seq_hash": "b66979eaf680fab872ffe1bde4c092d6", + "aa_seq_len": 167, + "aa_seq_hash": "3e034a4d80ac27352822774abd9319df", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "2": { + "seq_id": 2, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 618, + "dna_seq_len": 501, + "dna_seq_hash": "f02a36ff6df05f9bf38428fa22a035da", + "aa_seq_len": 167, + "aa_seq_hash": "e2d30bb18231528ef65c34880704dd7a", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "3": { + "seq_id": 3, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 619, + "dna_seq_len": 501, + "dna_seq_hash": "bee9d7360aa8e9b840fb29afa1de2c2e", + "aa_seq_len": 167, + "aa_seq_hash": "c3f71f5780b5f1031aaf21697a482ee3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "4": { + "seq_id": 4, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 620, + "dna_seq_len": 501, + "dna_seq_hash": "5b7956485455fdbc7c86d4834a8f7406", + "aa_seq_len": 167, + "aa_seq_hash": "60ce8f3b07f53378580ee528910ee623", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "5": { + "seq_id": 5, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 624, + "dna_seq_len": 501, + "dna_seq_hash": "98ba14aac74444a253123aff3d20c69f", + "aa_seq_len": 167, + "aa_seq_hash": "bab41702c7c209def93f9c9930c27086", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "6": { + "seq_id": 6, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 716, + "dna_seq_len": 501, + "dna_seq_hash": "6b9166d5d996897cae3cc288d7969d78", + "aa_seq_len": 167, + "aa_seq_hash": "5bc86c0a9226224922cbd6219c182622", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "7": { + "seq_id": 7, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "d401763f2df6e5fe87e1e07d3c170fe6", + "aa_seq_len": 167, + "aa_seq_hash": "928ad814483bbffda3e3b3a0aa4ca072", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "8": { + "seq_id": 8, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 120, + "dna_seq_len": 501, + "dna_seq_hash": "9c50d73cc4ef8d0a447f07ad150ad8cc", + "aa_seq_len": 167, + "aa_seq_hash": "928ad814483bbffda3e3b3a0aa4ca072", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "9": { + "seq_id": 9, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 555, + "dna_seq_len": 501, + "dna_seq_hash": "fab4f658dfba0cd0174a4a87998cf948", + "aa_seq_len": 167, + "aa_seq_hash": "a081905e659429db1f40e145932ae277", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "10": { + "seq_id": 10, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 557, + "dna_seq_len": 501, + "dna_seq_hash": "acb2ed027124e2a54b7734cd538590f1", + "aa_seq_len": 167, + "aa_seq_hash": "970184ec5ccc9f02ee3c858d2687cc18", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "11": { + "seq_id": 11, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 558, + "dna_seq_len": 501, + "dna_seq_hash": "ad996a122298d55ab3d4b2ea7a4974b0", + "aa_seq_len": 167, + "aa_seq_hash": "945455021fffea9b793d16af630db961", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "12": { + "seq_id": 12, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 563, + "dna_seq_len": 501, + "dna_seq_hash": "815242e67f31f4e2968f7f0620565125", + "aa_seq_len": 167, + "aa_seq_hash": "1b117ca76a022ae63d6f7bfe2ead289e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "13": { + "seq_id": 13, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 633, + "dna_seq_len": 501, + "dna_seq_hash": "532742ae95c046241789d79e68e30b7a", + "aa_seq_len": 167, + "aa_seq_hash": "fff51d2396f3da88a775416b4c6d14b6", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "14": { + "seq_id": 14, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 316, + "dna_seq_len": 432, + "dna_seq_hash": "3922f6256f2891400db415013eb0b208", + "aa_seq_len": 144, + "aa_seq_hash": "0af9d546dfcaf93373a8919df3e30323", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "15": { + "seq_id": 15, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 343, + "dna_seq_len": 432, + "dna_seq_hash": "f76c13e33ad5b502dfe64181dbdf2378", + "aa_seq_len": 144, + "aa_seq_hash": "32484f065f9013aaa5b3c694cc99cdbf", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "16": { + "seq_id": 16, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 472, + "dna_seq_len": 438, + "dna_seq_hash": "80bea3abd165ee14e51bc9e9779fc6a1", + "aa_seq_len": 146, + "aa_seq_hash": "4e9cc2d289f1c946738cc8e6e4ef1186", + "dna_min_len": 306, + "dna_max_len": 744, + "aa_min_len": 102, + "aa_max_len": 248, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "17": { + "seq_id": 17, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 489, + "dna_seq_len": 432, + "dna_seq_hash": "83a314185d9ff0bf7c2953d30979e7eb", + "aa_seq_len": 144, + "aa_seq_hash": "5f9fc3707789543f2f14b0f1a555a05c", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "18": { + "seq_id": 18, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 497, + "dna_seq_len": 432, + "dna_seq_hash": "c70622b317de74bdaf57eb8bb5134537", + "aa_seq_len": 144, + "aa_seq_hash": "56b3d46d3e517eb7f83f089f9ed5ae2a", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "19": { + "seq_id": 19, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 498, + "dna_seq_len": 432, + "dna_seq_hash": "f284b11b34de688e2ef54c1b73936595", + "aa_seq_len": 144, + "aa_seq_hash": "da558cdebd900031d0df8f58ef01454e", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "20": { + "seq_id": 20, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "9f762c246c542c52c94c5022ca62311c", + "aa_seq_len": 167, + "aa_seq_hash": "447381a0d286fa1037b5499e2242819a", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "21": { + "seq_id": 21, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 754, + "dna_seq_len": 501, + "dna_seq_hash": "65b434bea0d1939d2b748dbc5dd6df8b", + "aa_seq_len": 167, + "aa_seq_hash": "2b685aa7892794b69c9faa20c58a9183", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "22": { + "seq_id": 22, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 757, + "dna_seq_len": 501, + "dna_seq_hash": "eccfc35078428e44e5dd3e85d9ebf1fe", + "aa_seq_len": 167, + "aa_seq_hash": "35fa89ee4cd8689b89d553157471afe0", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "23": { + "seq_id": 23, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 759, + "dna_seq_len": 501, + "dna_seq_hash": "ce01d780cd0ffe3197f708d7048a473b", + "aa_seq_len": 167, + "aa_seq_hash": "bc0edd26ea6032cc4939e8cbc17a12d3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "24": { + "seq_id": 24, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 768, + "dna_seq_len": 501, + "dna_seq_hash": "23377e95fe00bf6a16b51fe8929a938a", + "aa_seq_len": 167, + "aa_seq_hash": "9fb34628ef67396ed38c755280e04f7e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "25": { + "seq_id": 25, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 838, + "dna_seq_len": 501, + "dna_seq_hash": "8478cdd016753651cd73afc4ad20c7df", + "aa_seq_len": 167, + "aa_seq_hash": "6512669779521a6792ecdae3088467f7", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "26": { + "seq_id": 26, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 907, + "dna_seq_len": 501, + "dna_seq_hash": "ab935d39fffeff601d95a8362ba454f3", + "aa_seq_len": 167, + "aa_seq_hash": "1c277aef51e883e29ee8b489c525ea1b", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "27": { + "seq_id": 27, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 24, + "dna_seq_len": 399, + "dna_seq_hash": "a7af783dc7084f1b8bc593aa29f80003", + "aa_seq_len": 133, + "aa_seq_hash": "46a0c532edb92303b1b9d12a80056a60", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "28": { + "seq_id": 28, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 48, + "dna_seq_len": 399, + "dna_seq_hash": "9fb313e6232b0d0e14d2fc4be7c409f7", + "aa_seq_len": 133, + "aa_seq_hash": "0e56efdd1f7fbaf132524616e29d98ca", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "29": { + "seq_id": 29, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 317, + "dna_seq_len": 399, + "dna_seq_hash": "50cd750e2f6860dd489040f1d5f64f9b", + "aa_seq_len": 133, + "aa_seq_hash": "18e887a66ce56a930dbf8db48b406596", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "30": { + "seq_id": 30, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 487, + "dna_seq_len": 399, + "dna_seq_hash": "0e1384e36f3897f65690f9230d2bcd73", + "aa_seq_len": 133, + "aa_seq_hash": "20c9a488aa6542257a151ced866d2f8f", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "31": { + "seq_id": 31, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 608, + "dna_seq_len": 399, + "dna_seq_hash": "e180fd1852382c132851674a9e379c03", + "aa_seq_len": 133, + "aa_seq_hash": "c7da76b50946241fe125348a19a9b6a3", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "32": { + "seq_id": 32, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 611, + "dna_seq_len": 399, + "dna_seq_hash": "0ec842f985e93041c928ab7bb137295d", + "aa_seq_len": 133, + "aa_seq_hash": "be3990f2abaa8780b14e62d4fc8cd82a", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "33": { + "seq_id": 33, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 612, + "dna_seq_len": 399, + "dna_seq_hash": "9d42e484ea2936f87312f07abf0ad84a", + "aa_seq_len": 133, + "aa_seq_hash": "7af624e3930c7a5ab7785b08d925081c", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "34": { + "seq_id": 34, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 619, + "dna_seq_len": 399, + "dna_seq_hash": "02949c6f858f3cc5de1b13c9f5a40705", + "aa_seq_len": 133, + "aa_seq_hash": "52d120d4090a22e450633e01e4ccb729", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "35": { + "seq_id": 35, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 631, + "dna_seq_len": 315, + "dna_seq_hash": "c4715d7df9a9eebfe5a334dd55ee469b", + "aa_seq_len": 105, + "aa_seq_hash": "31aa38918b303bf67374188e11413e59", + "dna_min_len": 220, + "dna_max_len": 535, + "aa_min_len": 73, + "aa_max_len": 178, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "36": { + "seq_id": 36, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 724, + "dna_seq_len": 399, + "dna_seq_hash": "782d08e7ee8a031a1402020e708bfbbc", + "aa_seq_len": 133, + "aa_seq_hash": "b5f9063808b8be839e7f169bf73c88e4", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "37": { + "seq_id": 37, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "481b6454f33fae7875b4978c14094ec3", + "aa_seq_len": 167, + "aa_seq_hash": "fa04457773c66ae015014e915af2516d", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "38": { + "seq_id": 38, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 30, + "dna_seq_len": 501, + "dna_seq_hash": "79048d21794195277a6af839be13e6e1", + "aa_seq_len": 167, + "aa_seq_hash": "186c53cb5c2bf0b7ecac853c6067065d", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "39": { + "seq_id": 39, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 281, + "dna_seq_len": 501, + "dna_seq_hash": "f10d273aa97d5556a43b96721d666975", + "aa_seq_len": 167, + "aa_seq_hash": "4172d5e8c8265884fe5479e10527cb02", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "40": { + "seq_id": 40, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 399, + "dna_seq_len": 501, + "dna_seq_hash": "1839775cc7c29412648ec7b004e1a417", + "aa_seq_len": 167, + "aa_seq_hash": "c4cfbbf5c5814829188f4f404f312bd3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "41": { + "seq_id": 41, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 571, + "dna_seq_len": 501, + "dna_seq_hash": "fce3e68952108e415579b3ad24a3f150", + "aa_seq_len": 167, + "aa_seq_hash": "43372b6526524f5ed4542be83b5b8614", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "42": { + "seq_id": 42, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 686, + "dna_seq_len": 501, + "dna_seq_hash": "629ea0cbfe0d2e9f34b1ca034a6c55fd", + "aa_seq_len": 167, + "aa_seq_hash": "c4cfbbf5c5814829188f4f404f312bd3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "43": { + "seq_id": 43, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "eaec644b411bd0b3ab1e086fbabd29c9", + "aa_seq_len": 167, + "aa_seq_hash": "bfe756f2f421db752907a171f3a44d69", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "44": { + "seq_id": 44, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 31, + "dna_seq_len": 501, + "dna_seq_hash": "97e4acce4e840b1c48de51f55fccf620", + "aa_seq_len": 167, + "aa_seq_hash": "be9296cb1ea9443fb43c0f967d107988", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "45": { + "seq_id": 45, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 208, + "dna_seq_len": 501, + "dna_seq_hash": "fbc6cb34cddfb1fe6a7806d5f7613259", + "aa_seq_len": 167, + "aa_seq_hash": "b788ec581475c9ba71d997b2db6e1def", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "46": { + "seq_id": 46, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 630, + "dna_seq_len": 501, + "dna_seq_hash": "ce58c0cacd4e8d9fa4867d11f2add864", + "aa_seq_len": 167, + "aa_seq_hash": "c062c5c88bdebdf2883e06fe6823c71c", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "47": { + "seq_id": 47, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 631, + "dna_seq_len": 501, + "dna_seq_hash": "949426df5430f94547459d06c786d77b", + "aa_seq_len": 167, + "aa_seq_hash": "dac50e2b5df83fe87c9826ecf99d568e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "48": { + "seq_id": 48, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 632, + "dna_seq_len": 501, + "dna_seq_hash": "9a187a6b3e4675fe12ea213c7a23577c", + "aa_seq_len": 167, + "aa_seq_hash": "6536824faaa7880cfb44a6cd1ed057c9", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "49": { + "seq_id": 49, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 633, + "dna_seq_len": 501, + "dna_seq_hash": "7be8b9732228c1f82630b547d7011a5e", + "aa_seq_len": 167, + "aa_seq_hash": "1eac2cb94b8f619df1c9b0f3369f4a96", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "50": { + "seq_id": 50, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 637, + "dna_seq_len": 501, + "dna_seq_hash": "1895acdf991b49a885873fe82ce9ca85", + "aa_seq_len": 167, + "aa_seq_hash": "9fe9521d0bf495570a0fd425c0e48764", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "51": { + "seq_id": 51, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 638, + "dna_seq_len": 501, + "dna_seq_hash": "9776bbec78b5214d3dfca0d32b395d4b", + "aa_seq_len": 167, + "aa_seq_hash": "2914d167cc3579348e36d16afc628a39", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "52": { + "seq_id": 52, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 748, + "dna_seq_len": 501, + "dna_seq_hash": "6cf9d69644c819d9ecd3a0fd090977fc", + "aa_seq_len": 167, + "aa_seq_hash": "cf0168a601a4f5792c7326a2da650edb", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + } + } +} \ No newline at end of file diff --git a/locidex/example/manifest_in/fails/fails_name/results.json b/locidex/example/manifest_in/fails/fails_name/results.json new file mode 100644 index 0000000..5252454 --- /dev/null +++ b/locidex/example/manifest_in/fails/fails_name/results.json @@ -0,0 +1,14 @@ +{ + "analysis_start_time": "2024-04-04 14:12:12", + "parameters": { + "input_file": "locidex/example/build_db_mlst_in/senterica.mlst.txt", + "outdir": "/tmp/pytest-of-mwells/pytest-82/build0", + "name": "Locidex Database", + "db_ver": "1.0.0", + "db_desc": "", + "author": "", + "date": "", + "force": true + }, + "analysis_end_time": "2024-04-04 14:12:12" +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/manifest.json b/locidex/example/manifest_in/passes/manifest.json new file mode 100644 index 0000000..0bb50a4 --- /dev/null +++ b/locidex/example/manifest_in/passes/manifest.json @@ -0,0 +1,8 @@ +{ + "Locidex Database": { + "1.0.0": { + "db_relative_path_dir": "pass_one_db", + "db_relative_path_config": "pass_one_db/config.json" + } + } +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.fasta b/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.fasta new file mode 100644 index 0000000..a03cb89 --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.fasta @@ -0,0 +1,106 @@ +>0 +AAATTCCGTCCCGGACATGCGGACTACACCTATCACCAAAAATACGGTGTGCGAGATTACCGTGGCGGCGGCCGTTCATCGGCACGTGAAACCGCCATGCGTGTTGCTGCGGGAGCGATTGCCAAAAAATATCTGCAGCAAGAGTTTGGCATTGAAGTGCGTGCTTACTTGTCGCAAATGGGGGATGTCGCGATTGATAAAGTGGATTGGAATGAGATTGAAAACAACGATTTCTTCTGTCCTGATGTCGATAAAGTGGCTGCGTTTGACGAGCTGATCCGCGAGCTGAAAAAAGAAGGCGATTCGATCGGCGCGAAAATCCAAGTGGTCGCTACAGGCGTGCCGGTTGGACTGGGTGAGCCTGTGTTTGATCGCTTAGATGCGGATATTGCCCATGCCTTGATGAGCATCAACGCCGTGAAAGGAGTCGAGATTGGTGATGGCTTTGATGTGGTGCGCCAAAAAGGCAGCCAACACCGTGACCCGCTCACTCCACAAGGT +>1 +GTTTTCCGCCCGGGCCATGCCGACTATACCTACGAGCAGAAATACGGTCTGCGCGATTACCGTGGCGGCGGTCGTTCTTCCGCCCGTGAAACGGCGATGCGCGTCGCGGCTGGCGCGATTGCTAAAAAATATCTGGCGGAGAAACACGGCATCGTCATTCAGGGGTGTCTGACCCAGATGGGCGATATTCCGCTTGAAATCAAAGACTGGCAGCAGGTTGAACAAAACCCGTTTTTCTGTCCTGATCCAGATAAAATCGACGCGCTGGATGAACTGATGCGCGCCCTGAAGAAAGAGGGCGATTCGATTGGGGCAAAAGTGACCGTCGTGGCAAACGGCGTTCCGGCCGGGCTTGGCGAACCGGTCTTTGACCGTCTGGATGCGGACATCGCTCATGCGCTGATGAGCATCAACGCGGTAAAAGGCGTGGAGATTGGCGATGGGTTTGATGTGGTCGCGTTGCGAGGCAGCCAGAATCGCGATGAAATTACCAAAGAGGGC +>2 +GTTTTCCGTCCAGGACACGCTGACTATACCTATGAGCAGAAATATGGCCTGCGCGACTACCGTGGCGGCGGACGTTCATCCGCGCGTGAAACGGCGATGCGCGTTGCGGCTGGCGCGATTGCCAAAAAATATCTGGCGGAAAAATTCGGCGTTGAAATTCGCGGCTGTCTGACGCAGATGGGGGATATTCCGCTGGAGATCAAAGACTGGTCTCAGGTGGAGCTTAACCCGTTCTTTTGTCCAGACCCGGATAAAATCGAAGTGCTGGACGAACTGATGCGCGGGCTGAAGAAAGAGGGCGACTCCATCGGGGCAAAAGTGACCGTTGTTGCAAGCGGCGTACCGGCGGGTCTCGGCGAACCTGTATTCGACCGTCTGGATGCCGACATCGCCCATGCGCTGATGAGCATTAACGCCGTTAAGGGCGTTGAGATTGGCGACGGTTTTGACGTTGTTGCGCTGCGCGGCAGTCAGAACCGCGATGAGATCACCAAAGAAGGT +>3 +GTTTTCCGCCCAGGGCATGCTGATTATACCTATGAACAAAAATATGGTTTGCGTGATTATCGTGGTGGTGGACGTTCTTCTGCTCGTGAAACGGCAATGCGTGTCGCCGCAGGTGCGATTGCTAAAAAATATCTAAAAGAGAAATTAGGCATCGAAGTTCGAGGATATCTTTCTCAGCTAGGACCTATTACATGTGATCTTGTTGATTGGTCTATTGTTGAAAGCAATCCATTTTTCTGTCCTGATCCTTCACGTTTAGATGCGCTTGATGAATACATGCGTGCACTTAAAAAAGAAGGTAATTCTATTGGTGCAAAAGTCACTGTGGTTGCACAGGGTGTACCTGCTGGATTTGGTGAACCTGTCTTTGATCGATTAGATGCTGATTTAGCGCATGCTTTGATGAGTATCAATGCTGTCAAAGGTATAGAAATTGGTGATGGATTTGGTGTTGTAACATTAAAAGGTACAGAAAACCGAGATGAAATCACTAAAAAGGGA +>4 +GTTTTCCGTCCAGGCCATGCCGATTACACCTACGAACAAAAATACGGTCTGCGCGATTATCGCGGCGGCGGGCGCTCTTCCGCCCGCGAAACCGCCATGCGCGTGGCGGCAGGGGCGATTGCAAAAAAATATCTCGCCGAGAAATTTGGCATTGAGATTCGCGGCTGCCTGACCCAGATGGGTGACATTCCGCTGGAAATCAAAGACTGGTCGCAGGTCGAGCAAAATCCGTTTTTCTGCCCGGACCCGGACAAAATCGACGCGTTAGATGAACTGATGCGCGCGCTGAAAAAAGAGGGCGACTCCATCGGCGCGAAAGTCACCGTTGTTGCCAGTGGCGTCCCCGCCGGACTTGGCGAGCCGGTCTTTGACCGCCTGGATGCCGACATCGCCCATGCGCTGATGAGCATCAACGCGGTGAAAGGCGTAGAAATTGGTGATGGTTTTGACGTGGTGGCGCTGCGTGGCAGCCAGAACCGCGACGAAATCACCAAAGACGGT +>5 +GTTTTCCGTCCTGGTCACGCCGACTATACCTACGAACAAAAATATGGCTTTCGCGACTATCGCGGCGGCGGGCGTTCTTCCGCGCGTGAAACCGCGATGCGCGTGGCGGCAGGGGCAATTGCCAAAAAATATCTCCAGCAGAAATTCGGCATCGTTATCCGCGGCTGTCTGTCCCAGATGGGCGACATTCCGCTGGCAATCAAAGACTGGGATCAGGTAGAGCTCAACCCGTTCTTCTGCGCCGATGCCGACAAGCTGGACGCGCTGGATGAGCTGATGCGTGGCCTGAAAAAAGAGGGCGACTCCATTGGTGCGAAAGTCACCGTGGTGGCCGACGGCGTGCCGGCTGGCTGGGGCGAGCCGGTATTTGACCGCCTTGACGCCGACATCGCCCACGCGCTGATGAGCATCAACGCGGTGAAAGGCGTCGAAATCGGCGACGGTTTTGACGTGGTCAAGCTTCGCGGCAGCCAGAACCGCGACGAAATCACGAAGGCGGGT +>6 +GTGTTCCGTCCGGGGCACGCGGATTACACCTACGAACAAAAATACGGCCTGCGCGACTATCGCGGCGGCGGGCGTTCATCCGCCCGTGAAACCGCCATGCGCGTCGCGGCAGGCGCTATCGCCAAAAAATATCTGGCGCAGAAATTCGGCGTGGTGATTCGCGGCTGCCTGACCCAGATGGGTGATATTCCGCTGGAAATCAAAGACTGGGATCAGGTAGAGCAAAACCCGTTCTTCTGCCCGGACCCGGATAAAATCGAGGCGCTGGATGAGCTGATGCGCGCTCTGAAAAAAGAGGGCGATTCCATCGGCGCGAAAGTCACCGTGGTGGCCGACAGCGTGCCCGCCGGGCTTGGCGAGCCGGTATTTGACCGCCTGGACGCCGATATCGCCCACGCGCTGATGAGCATTAACGCCGTGAAGGGCGTGGAAATCGGCGACGGTTTCGGCGTGGTGCAACTGCGCGGCAGCCAGAACCGCGACGAAATCACCACTGCCGGT +>7 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAGCCAGGCGCCACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGCGGCCTGCCGGAGGGCGCGGAGATTGCCGTTCAGTTGGAAGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGCCGCTTCTCGCTGTCTACGCTGCCTGCCGCCGATTTCCCGAATCTTGACGACTGGCAAAGCGAAGTTGAATTTACGCTGCCGCAGGCCACGATGAAGCGCCTGATTGAAGCGACCCAGTTTTCGATGGCCCATCAGGATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAGCGAACTGCGCACTGTTGCGACCGACGGCCACCGTCTGGCGGTGTGCTCAATGCCGCTGGAGGCGTCTTTACCTAGCCACTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGTGGCGAAAACCCGCTGCGCGTGCAG +>8 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAACCCGGCGCTACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCCTGCCGGAAGGGGCGGAAATCGCCGTTCAGCTGGAGGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGTCGCTTTTCGCTGTCTACCTTACCGGCAGCAGACTTCCCGAATCTGGATGACTGGCAAAGCGAAGTGGAATTCACGCTGCCTCAGGCGACGATGAAACGCTTGATTGAGGCCACCCAGTTTTCGATGGCCCATCAGGACGTGCGCTACTACCTGAACGGTATGTTGTTTGAAACGGAAGGAAGCGAACTGCGCACCGTCGCGACCGACGGCCACCGTCTGGCGGTCTGTTCAATGCCGCTGGAGGCCTCTTTACCGAGCCATTCAGTGATCGTACCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTTGACGGCGGTGAAAATCCACTGCGTGTACAG +>9 +ATGGAAATGGTGGCGCGCGTTGCGTTGATTCAGCCTCATGAACCAGGCGCAACTACCGTCCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCTTGCCGGAAGGGGCTGAAATTGCCGTCCAGCTGGAAGGCGATCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTCTCGCTTTCCACGCTGCCTGCCGCCGATTTCCCTAATCTGGATGACTGGCAGAGCGAAGTCGAATTCACCCTGCCGCAGGCAACGATGAAGCGCCTGATTGAAGCCACCCAGTTCTCAATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAGACTGAAGGTGAAGAGTTGCGTACCGTCGCGACCGACGGTCACCGTCTGGCGGTCTGCTCTATGCCGGTCGGGCAATCTCTGCCTAACCATTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAGCTGATGCGTATGCTCGACGGCGGCGAAACCCCGCTGCGCGTACAG +>10 +ATGGAGATGGTGGCGCGCGTGGCGCTGATCCAGCCTCATGAACCTGGTGCGACCACCGTTCCGGCGCGTAAATTCTTCGATATTTGCCGTGGATTACCAGAAGGGGCGGAAATTGCCGTTCAACTGGAAGGCGACCGTATGCTGGTGCGTTCTGGCCGCAGCCGTTTCTCGCTGTCTACGCTGCCTGCCGCCGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTCGAATTCACCCTGCCACAGGCGACAATGAAGCGCCTGATTGAAGCCACGCAGTTTTCGATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAAACCGAAGGGGAAGAGTTGCGTACCGTGGCGACCGACGGTCACCGCCTGGCGGTCTGTTCAATGCCTGTCGGTCAGCCGTTGCCTAGCCATTCGGTGATCGTACCGCGTAAAGGTGTGATTGAACTGATGCGTATGCTCGACGGCGGCGATAACCCGCTGCGCGTGCAG +>11 +ATGGAAATGGTGGCACGCGTTGCGCTGGTTCAGCCGCACGAACCAGGGGCGACGACCGTTCCAGCGCGCAAATTCTTTGATATCTGCCGTGGTCTGCCTGAAGGCGCGGAAATTGCCGTGCAGCTGGAAGGTGAGCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTTTCGCTGTCTACCCTGCCAGCGGCGGATTTCCCGAATCTCGATGACTGGCAGAGCGAAGTCGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCGACCCAGTTTTCTATGGCGCATCAGGACGTTCGCTATTACTTAAACGGTATGCTGTTTGAAACCGAAGGTGAAGAACTGCGCACCGTGGCGACCGACGGCCACCGTCTGGCAGTCTGTTCAATGCCAATTGGTCAATCTTTGCCAAGCCATTCGGTGATCGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGCGGCGACAATCCGCTGCGCGTGCAG +>12 +ATGGAAATGATCGCGCGCGTTACGCTGACTCAGCCGCACGACGCGGGCGCGACCACGGTTCCGGCACGTAAATTCTTTGATATTTGCCGTGGGCTGCCGGAAGGCGCTGAAATCGCAGTGCAGCTGGAGGGCGACCGCATGCTGGTGCGCTCTGGCCGCAGCCGTTTCTCCCTCTCCACGTTGCCCGCTGCGGACTTCCCGAACCTGGATGACTGGCAGAGCGAAGTTGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCCACGCAGTTCTCCATGGCGCATCAGGACGTTCGTTACTACTTAAACGGCATGCTGTTCGAAACCGAAGGTGAAGAGCTGCGTACCGTGGCGACCGACGGTCACCGTCTGGCGGTTTGTTCCATGCCGATTGGCGATTCACTGCCAAACCATTCGGTGATCGTACCGCGTAAAGGCGTAATTGAACTGATGCGTATGCTCGACGGCGGTGAAACGCCGCTGCGCGTGCAG +>13 +ATGGAGATGATCGCGCGTGTGGCGCTGTCGCTACCGCACCAGGCGGGCGCGACCACCGTGCCGGCGCGCAAATTCTTCGATATCTGCCGTGGCTTGCCGGAAGGGGCGGAAATCGCCGTTACGCTGGAAGGCGACAGAATGCTGGTGCGCTCCGGGCGCAGCCGCTTCTCGCTGTCTACGTTACCGGCGGCAGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTGGAGTTCACGCTCCCGCAGGCCACCATGAAGCGCCTGATCGAAGCGACCCAGTTCTCCATGGCCCATCAGGACGTGCGGTATTACCTGAACGGGATGCTGTTTGAAACCGAAGGCGAAGAGCTGCGCACCGTGGCGACTGACGGCCACCGTCTGGCGGTATGCGCGATGCCGGTAGGCCAACCGCTGCCAAACCATTCGGTGATTGTACCGCGTAAAGGCGTGCTGGAGCTGATGCGTATGCTCGATGGCGGCGACAGCCCGCTGCGCATTCAG +>14 +TCGGCGCTGACGGAAAACGATCTGGTCTTCGCCCTCTCGCAGCACGCCGTCACCTTTGCAGATGCCGAGCTTCAGCAACAAGGGAAAAGCTGGCCCTCCCTTCCGCGTTATTTTGCCATTGGTCGCACAACGGCGCTGGCGCTGCATACCGTTAGCGGTTTCAATATTCACTACCCTCTGGATCGGGAAATTAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGCGCGCTTATATTACGCGGCAATGGTGGCCGTGAGCTGATAGGTGAAACCCTGACAGCACGCGGAGCTGATGTCGATTTTTGTGAATGTTATCAACGCAGTGCAAAATATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCTCGTGGTGTGACCACGGTGGTTGTCACCAGCGGAGAGATGCTACAA +>15 +GCGGCGCTGGGGGAGAGCGATCTGTTGTTTGCCCTCTCGCAACACGCGGTTGCTTTTGCCCAATCACAGCTGCATCAGCAAGATCGTAAATGGCCCCGACTACCTACTTATTTCGCCATTGGACGCACCACCGCACTGGCGCTACATACCGTAAGCGGACAGAAGATTCTCTACCCGCAGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGTGCGCTGATATTACGTGGCAATGGCGGTCGTGAGCTAATTGGGGATACCCTGACGGCGCGCGGTGCTGAGGTCACTTTTTGTGAATGTTATCAACGATGCGCAATCCATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCCCGCGAGGTGACGACGGTCGTTGTTACCAGCGGTGAAATGTTGCAG +>16 +GCGACGTTGACGGAAAACGATCTGGTTTTTGCCCTTTCACAGCACGCCGTCGCCTTTGCCCACGCCCAACTCCAGCGAGATGGTCGAAACTGGCCTGCGTCGCCGCGCTATTTCGCGATTGGTCGCACCACGGCGCTCGCCCTTCATACCGTTAGCGGGTTCGATATTCGTTATCCATTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGCGCGCTGATTTTGCGTGGCAATGGCGGTCGCGGTCGCGAACTGCTGGGCGAAACCCTGACAGCTCGCGGAGCCGAAGTCAGTTTTTGTGAATGTTATCAACGAAGTGCGAAACATTACGATGGCGCAGAAGAGGCGATGCGCTGGCACACTCGCGGCGTAACGACGCTTGTTGTCACCAGCGGCGAGATGTTGCAA +>17 +GCGGCGCTCACGGACAACGATCTGGTGTTCGCCCTCTCGCAACACGCCGTCGCCTTTGCCCACGCCCAACTGCAACAGCAGGAGCTGGACTGGCCTGTGCAACCACGCTACTTCGCCATCGGGCGCACAACGGCGCTGGCGCTGCATACCGTTAACGGATGCGATATTCGCTATCCTCTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGAGCGCTTATTTTACGGGGCAACGGCGGGCGTGAACTGTTAGGCAAAACCCTCACAGAACGCGGCGCTGAAGTCACCTTTTGTGAATGTTATCAACGCAGTGCAAAACATTACGATGGCGCGGAAGAGGCGATGCGCTGGCACTCTCGCGGCGTGACGACGATTGTTGTCACCAGCGGCGAAATGCTGCAA +>18 +GAAACACTTGGCGATAACGATCTGCTCTTTGCACTTTCTCAACATGCAGTGTCATTCGCCCATGCGCAGTTGCAACAGCAGGGGCTAAACTGGCCATCACTTCCGCATTATTTCGCTATTGGCCGTACTACCGCTCTCGCCCTGCACACCGTAAGCGGACATAAGATTCGCTATCCACAAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCGGAATTACAAAGTATTGCGGGAAAACGCGCACTTATTTTGCGCGGTAACGGCGGCCGTGAATTGATCGGTCAGACGCTGACATCACGTGGTGCCGACGTTACTTTTTGTGAATGTTATCAACGCAGTGCGAAGCATTACGATGGTGCGGAAGAAGCTATGCGCTGGCAGTCTCGCGGCGTAACAACCGTCGTTGTAACCAGCGGTGAAATGCTGCAA +>19 +CGTCTCTTGCAGGAAGGCGATCTGCTCTTTGCGCTGTCGCAGCATGCCGTGGAGTTTGCCCATGCGCAGCTGCAACAGCATGCCGTTAGCTGGCCTCACGCCCCCCGCTATTTCGCCATCGGGCGCACCACGGCGCTGGCCTTACATACCGCGAGCGGAATCGATGTTCGTTACCCGTTAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAACCATTGCCGGAAAGCGCGCGCTCATTTTGCGCGGCAACGGTGGCCGCGAACTGCTGGGCGAAACGCTGCGCGAACGCGGCGCAGACGTGACGTTTGTGGAGTGCTATCAGCGCTGTGCGAAACACTATGATGGCGCGGAAGAAGCAATGCGCTGGCACGCCCGCGGTATTAATACGCTGGTGGTCACCAGCGGTGAAATGTTACAA +>20 +ATTGCGGGATGCCAGAAGGTGGTTCTGTGCTCGCCGCCACCCATCGCTGATGAAATCCTCTATGCGGCGCAACTGTGTGGCGTGCAGGAAATCTTTAACGTCGGCGGCGCGCAGGCGATTGCCGCTCTGGCCTTCGGCAGCGAGTCCGTACCGAAAGTGGATAAAATTTTTGGCCCCGGCAACGCCTTTGTAACCGAAGCCAAGCGTCAGGTCAGCCAGCGTCTCGACGGCGCGGCTATCGATATGCCAGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCAACACCGGATTTCGTCGCTTCTGACCTGCTCTCCCAGGCTGAGCACGGCCCGGATTCCCAGGTGATCCTGCTGACGCCGGATGCTGACATTGCCCGCAAGGTGGCGGAGGCGGTAGAACGTCAACTGGCGGAACTGCCGCGCGCGGGCACCGCCCGGCAGGCCCTGAGCGCCAGTCGTCTGATTGTGACCAAAGATTTAGCGCAGTGCGTC +>21 +ATTGCCGGATGCAAAAAAGTGGTGTTGTGCTCGCCACCGCCTATCGCGGATGAAATCCTTTACGCTGCGCAGCTGTGCGGCGTGCAGGAAATCTTCAACGTCGGCGGCGCCCAGGCCATTGCCGCTCTGGCGTTCGGCAGCGAATCCGTGCCAAAAGTGGACAAAATTTTTGGCCCCGGCAACGCGTTTGTCACCGAGGCGAAACGCCAGGTCAGCCAGCGTCTCGACGGCGCGGCAATTGATATGCCTGCCGGCCCTTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCCACGCCAGATTTCGTGGCGTCTGACCTGCTCTCTCAGGCGGAACACGGCCCGGATTCTCAGGTCATCCTGCTGACCCCGGATGCCGGTATTGCGCAGAACGTCGCAGAGGCCGTCGAACGCCAGTTAGCGGAGTTACCGCGTGCAGAAACGGCGCGTCAGGCATTAAGCGCCAGCCGTCTGATCGTGACGAAAGACTTAGCCCAGTGCGTC +>22 +ATTGCAGGCTGTAAAAAAGTGGTGTTGTGCTCTCCCCCACCTATCGCCGATGAAATTCTGTATGCTGCGCAGCTCTGCGGCGTACAGGATGTGTTTAACGTTGGGGGCGCACAAGCTATTGCCGCGCTGGCATTTGGCAGTGAATCCGTGCCGAAAGTGGACAAAATTTTTGGCCCCGGTAATGCCTTTGTGACCGAAGCCAAACGTCAGGTGAGTCAGCGTCTGGACGGCGCCGCCATCGATATGCCAGCAGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCCACGCCGGATTTCGTTGCCTCTGACTTACTCTCGCAGGCCGAACACGGCCCCGATTCCCAAGTGATCCTGCTGACGCCGGATGCCGGTATGGCCAGCCGGGTTGCTGAAGCAGTAGAACGCCAGCTTGCAGCGCTGCCACGCGCTGAAACCGCGCGGCAGGCGTTAAGCGCCAGTCGTCTGATTGTCACCCGCTCCCTTGCGCAATGCGTA +>23 +ATTGCGGGCTGTAAAAAAGTGGTGCTGTGCTCACCGCCGCCGATTGCCGATGAGATCCTTTACGCGGCGCAGCTGTGCGGTGTGCAGGACGTGTTTAACGTCGGCGGCGCACAGGCCATTGCCGCGCTGGCGTTTGGTACAGAATCCGTGCCGAAAGTGGACAAAATCTTCGGGCCAGGTAACGCCTTTGTCACCGAGGCAAAACGTCAGGTGAGCCAGCGTCTGGACGGTGCGGCGATCGATATGCCCGCAGGCCCGTCGGAAGTGCTGGTGATTGCTGACAGCGGCGCAACGCCGGATTTCGTGGCTTCTGATTTGCTCTCCCAGGCTGAACACGGCCCGGACTCTCAGGTGATTTTACTGACGCCCGCTGCTGATATGGCGCGTCGCGTAGCCGAAGCTGTCGAACGCCAGCTGGCAGAACTGCCGCGAGCTGAAACCGCCCGCCAGGCACTGAACGCCAGCCGCCTGATCGTGACTAAAGATTTAGCGCAGTGCGTG +>24 +ATTGCCGGTTGTCAGAAGGTGGTGCTCTGCTCTCCTCCACCGATCGCCGATGAGATCCTGTACGCGGCGAAGCTGTGCGGCGTGCAGGCGATCTATAAAGTGGGCGGTGCGCAGGCGATTTCTGCCCTGGCGTTCGGAACAGTATCCATTCCTAAGGTCGACAAAATCTTTGGCCCGGGCAATGCCTACGTGACCGAGGCGAAGCGCCAGGTCAGCCAGCGTCTGGACGGCGCGGCGATTGATATGCCTGCCGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCTACACCGGATTTCGTGGCCTCTGACCTGCTCTCGCAGGCCGAGCACGGCCCTGACTCGCAGGTGATTTTACTGACGCCAGATGCCGACATGGCAAAACGCGTGGGCGACGCCGTTGAGCGTCAGCTGGCTGACCTGCCGCGGGCGGAAACGGCGCGTCAGGCGCTATCCGCCAGCCGCCTGATTGTGGCCCGCGATCTTGACCAGTGCATC +>25 +ATCGCCGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTACGCCGCGCAACTCTGTGGCGTGAAAGAAGTGTTTAACGTGGGTGGCGCACAGGCCATTGCCGCGCTGGCGCTGGGCACGGAGTCTATTCCAAAAGTCGATAAAATCTTTGGGCCGGGCAACGCCTATGTGACCGAAGCCAAGCGCCAGGTCAGCCAGCGTCTTGACGGCGCGGCAATCGATATGCCCGCCGGACCGTCCGAAGTATTGGTTATCGCCGACAGCGGCGCAACGCCGGATTTTGTCGCCTCCGACCTGCTTTCTCAGGCCGAGCACGGCCCAGACTCGCAGGTGATCCTGCTGACGCCGGACGCTAAGCTTGCCGAGGGCGTGGCCGAAGCCGTTGAACGCCAGCTCGCCGAGCTGTCCCGCGCCGACACCGCGCGTCAGGCGCTCTCCGCCAGCCGTTTAATCGTAGCGAAAGATCTGGCGCAGTGCGTG +>26 +ATCGCGGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTATGCGGCGCGTTTGTGCGGGGTACAGCAGGTCTATCAGGTGGGCGGCGCTCAGGCCATCGCGGCGCTGGCGTTTGGCACCGAGACCGTACCCAAAGTGGACAAAATCTTCGGGCCGGGCAATGCGTTTGTCACCGAAGCCAAACGTCAGGTCAGCCAGCGGCTGGATGGCGCGGCGATTGATATGCCTGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGATAGCGGCGCGACCACGGATTTCGTGGCCTCGGATTTGCTGTCCCAGGCGGAACACGGCCCGGATTCGCAGGTGATCCTGCTGACACCGGACAGCGCCATGGCGCAGGCGGTGGCCGACGCGGTTGAGCGTCAACTCGCCGAACTGCCGCGCGCGGAAACAGCTCGCCAGGCGCTGGCGGAAAGCCGCCTGATTGTGGCGCGCGATTTAGCGCAGTGCGTG +>27 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAATTTTTGACATTCTGGATATTCCGCACCATGTCGAAGTGGTTTCTGCTCACCGTACCCCCGATAAACTGTTCAGCTTTGCCGAAAATGCTGAAGAAAACGGCTTTCAGGTAATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCAGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTTGGCGTTCCGGTACAAAGCGCTGCGCTAAGCGGTGTGGACAGTCTCTATTCTATTGTACAGATGCCGCGCGGTATTCCGGTTGGCACACTGGCCATCGGCAAAGCTGGCGCCGCTAACGCGGCGCTGCTGGCGGCGCAAATTCTGGCCACCCACGATAACGCACTGCATCAGCGCCTTCGCGAC +>28 +AGCGACTGGACTACCATGCAATTCGCCGCCGAAATTTTTGAAATTCTGGATGTTCCGCACCATGTAGAAGTGGTTTCCGCCCATCGAACCCCTGATAAACTGTTCAGCTTCGCCGAAACGGCGGAAGAGAACGGATATCACGTGATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACATTGGTGCCGGTACTCGGCGTTCCGGTACAAAGCGCAGCATTAAGCGGTGTGGATAGCCTTTACTCCATTGTTCAGATGCCGCGTGGCATTCCGGTGGGTACACTGGCTATCGGCAAAGCCGGGGCTGCGAACGCCGCGCTGCTGGCAGCGCAAATTTTGGCCACACACGATAATGCGCTGCACCAGCGCCTGAGCAAC +>29 +AGCGACTGGGCTACCATGCAGTTCGCCGCAGAAATCCTCGATATTCTGAACGTACCTCACCATGTTGAAGTGGTTTCCGCCCACCGCACGCCCGATAAACTGTTCAGCTTCGCCGAAGACGCCGAAAGCAACGGTTATCAGGTGATTATTGCCGGTGCCGGCGGCGCTGCGCACTTACCCGGAATGATTGCCGCCAAAACGCTGGTCCCGGTATTAGGTGTACCCGTCCAGAGCGCCGCATTAAGCGGTGTCGATAGCCTCTACTCCATCGTGCAGATGCCGCGCGGCATTCCGGTCGGTACGCTGGCGATCGGTAAAGCCGGTGCCGCTAACGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>30 +AGCGACTGGGCTACCATGCAGTTCGCCGTCGAAATCTTCGAAATCCTGAATGTCCCGCACCACGTTGAAGTGGTTTCTGCTCACCGCACCCCCGATAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTATCAGGTGATTATTGCGGGCGCAGGCGGCGCAGCGCACCTGCCAGGCATGATTGCCGCCAAAACGCTGGTGCCGGTGCTGGGCGTGCCAGTACAGAGCGCCGCACTGAGCGGTGTCGATAGCCTCTACTCCATCGTACAAATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATTGGTAAAGCTGGCGCGGCAAACGCGGCATTACTGGCAGCACAAATTCTCGCGACTCACGATAAAGAGCTACACCAGCGTCTGAATGGC +>31 +AGCGACTGGGCTACCATGCAGTTTGCCGCCGAAATCTTCGATATCCTGAACGTTCCACACCACGTTGAAGTGGTTTCCGCACACCGCACCCCCGATAAGCTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAAGGGTTATCAGGTGATTATTGCCGGTGCTGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTGGGCGTGCCGGTGCAAAGCGCTGCGCTGAGCGGCGTGGACAGCCTCTACTCTATCGTCCAGATGCCGCGCGGCATTCCGGTCGGCACGCTGGCGATCGGCAAAGCGGGCGCGGCGAACGCGGCGTTACTGGCAGCGCAAATTCTGGCGACACACGATAAAGACCTGCGCCAACGTCTGGCGGAC +>32 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTCGAAATGCTGGACGTTCCGCACCATGTTGAAGTCGTCTCAGCCCACCGTACCCCTGATAAACTGTTCAGCTTCGCCGAAAGCGCTGAAGAAAACGGTTATCAGGTTATTATTGCGGGTGCTGGCGGTGCAGCGCATCTGCCGGGCATGATTGCAGCGAAAACGCTGGTCCCCGTGTTAGGCGTTCCGGTACAAAGCGCAGCGTTGAGCGGCGTAGATAGCCTCTACTCAATCGTGCAGATGCCACGCGGCATCCCCGTGGGTACGCTGGCGATTGGGAAAGCGGGTGCGGCAAATGCGGCCCTGCTGGCAGCACAAATTCTGGCAACACACGACAAAGCATTACATCAGCGTCTGAGCGAC +>33 +AGTGACTGGGCAACCATGTCTCATGCCGCAGATGTATTAGATACACTACAAATTCCTTACCATGTTGAGATTGTCTCTGCACACCGAACCCCTGATAAGTTATTTAGTTTTGCTGAAAAAGCAAAAAGTAATGGCTTTGATGTCATTATTGCTGGTGCAGGAGGAGCTGCCCATTTACCAGGAATGCTTGCAGCTAAAACGTTAGTACCCGTATTTGGTGTTCCTGTTCAAAGTGCGACATTAAGCGGTGTTGATAGCCTCTATTCAATCGTACAAATGCCAAAAGGTATCCCTGTAGGAACCTTAGCGATTGGTAAAGCAGGGGCTGCCAATGCGGCTTTATTAGCGGCTCAAGTTTTAGCGTTACATTCTCCTGCTATTTTAGATGCATTGACTGCA +>34 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTTGAAATCCTGAATGTTCCGCACCACGTCGAAGTGGTTTCCGCACACCGTACCCCGGACAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTACGAGGTGATCATTGCCGGTGCGGGCGGCGCAGCACATCTGCCGGGCATGATTGCCGCCAAAACGCTGGTGCCGGTACTGGGTGTTCCCGTGCAAAGCGCCGCGTTAAGCGGGGTGGATAGCCTTTACTCTATTGTCCAGATGCCGCGCGGTATTCCTGTCGGTACCCTGGCGATTGGTAAAGCAGGTGCGGCAAATGCCGCCCTGCTGGCCGCGCAGATCCTGGCGACGCATGATAAAGATTTGCACCAGCGTCTGGCGGAG +>35 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAACGGCGGAAGAGAACGGATATCAAGTGATTATTGCCGGCGCGGGCGGCGCGGCGCACCTGCCGGGAATGATTGCGGCAAAAACGCTGGTCCCGGTACTCGGCGTGCCGGTACAAAGCGCTGCGCTAAGCGGCGTGGATAGCCTTTACTCCATTGTGCAGATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATCGGTAAAGCCGGTGCGGCTAATGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>36 +AGCGACTGGGCCACCATGCAGCATGCCGCTGAAATTCTTGATGCCCTTGATGTTCCTTACCATGTTGAAGTGGTTTCCGCTCACCGCACGCCTGATAAGCTTTTCAGCTTTGCTGAATCCGCGCAGCACAACGGTTATCAGGTGATTATTGCTGGCGCAGGCGGTGCGGCGCATCTGCCGGGCATGATCGCCGCGAAAACCCTGGTGCCGGTATTAGGCGTGCCGGTGCAAAGCGCGGCCCTGAGCGGCGTGGACAGCCTCTACTCTATCGTGCAAATGCCGCGCGGCATTCCGGTAGGGACGCTGGCGATCGGCAAAGCGGGTGCTGCAAACGCCGCACTGCTGGCGGCGCAGATCCTCGCCCAGCATGACGATGCGCTACTGGCGCGTCTGGCGGCA +>37 +AAACGCTTCCTGAACGAACTGACCGCCGCTGAAGGGCTGGAACGTTATCTGGGCGCCAAATTCCCGGGTGCGAAACGTTTCTCGCTCGAGGGGGGAGATGCGCTGATACCTATGCTGAAAGAGATGGTTCGCCATGCGGGTAACAGCGGCACTCGCGAAGTGGTGCTGGGGATGGCGCACCGCGGTCGTCTGAACGTGCTGATCAACGTACTGGGTAAAAAACCGCAGGATCTGTTCGACGAGTTTGCCGGTAAACATAAAGAACATCTGGGTACCGGCGACGTGAAGTATCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGTCTGGTTCACCTGGCGCTGGCGTTTAACCCATCGCATCTGGAAATTGTGAGCCCGGTGGTGATGGGCTCCGTGCGCGCCCGTCTGGACCGACTGGACGAACCGAGCAGTAATAAAGTGCTGCCGATCACTATTCACGGCGACGCCGCGGTGACCGGCCAGGGCGTGGTTCAG +>38 +AAACGCTTCCTGAACGAACTGACCGCTGCAGAAGGGCTGGAACGTTATCTGGGGGCAAAATTCCCTGGCGCGAAACGTTTTTCGCTGGAAGGCGGCGATGCGTTAATTCCGATGCTCAAAGAGATGGTCCGCCATGCGGGCAACAGCGGCACCCGCGAAGTGGTGTTGGGAATGGCGCACCGTGGTCGCCTGAACGTACTGGTCAACGTGCTGGGTAAAAAACCTCAGGATCTGTTTGACGAGTTTGCCGGTAAACATAAAGAACATTTGGGCACCGGCGACGTGAAGTACCATATGGGTTTCTCGTCGGATATCGAAACCGAAGGCGGACTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTCAGCCCGGTAGTGATGGGGTCTGTGCGCGCACGTCTCGACCGGCTCGACGAACCGAGCAGCAACAAAGTGTTGCCAATCACCATTCATGGTGATGCAGCAGTTACCGGGCAGGGCGTGGTTCAG +>39 +AAACGCTTCTTAAGCGAACTGACCGCCGCTGAAGGCCTTGAACGTTACCTCGGCGCAAAATTCCCTGGCGCAAAACGCTTCTCGCTGGAAGGCGGTGACGCGTTAATCCCGATGCTTAAAGAGATGATCCGCCACGCTGGCAACAGCGGCACCCGCGAAGTGGTTCTCGGGATGGCGCACCGTGGTCGTCTGAACGTGCTGGTGAACGTGCTGGGTAAAAAACCGCAAGACTTGTTCGACGAGTTCGCCGGTAAACATAAAGAACACCTCGGCACGGGTGACGTGAAATACCACATGGGCTTCTCGTCTGACTTCCAGACCGATGGCGGCCTGGTGCACCTGGCGCTGGCGTTTAACCCGTCTCACCTTGAGATTGTAAGCCCGGTAGTTATCGGTTCTGTTCGTGCCCGTCTGGACAGACTTGATGAGCCGAGCAGCAACAAAGTGCTGCCAATCACCATCCACGGTGACGCCGCAGTGACCGGGCAGGGTGTGGTTCAG +>40 +AAACGCTTCCTCAGCGAACTGACTGCAGCGGAAGGTCTGGAACGCTACCTGGGCGCGAAATTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGTGATGCGTTAATCCCAATGCTCAAAGAGATGATCCGCCACGCCGGTAACAGCGGTACCCGTGAAGTGGTACTGGGTATGGCGCACCGTGGTCGTCTGAACGTCCTGGTTAACGTGCTGGGTAAAAAGCCGCAGGATCTATTCGACGAATTTGCGGGCAAACATAAAGAACACCTCGGTACCGGTGACGTGAAGTACCACATGGGCTTCTCATCGGATATCGAAACCGAAGGCGGTCTGGTGCATCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTTATCGGTTCCGTACGTGCACGCTTGGATCGTCTGGACGAGCCGAGCAGCAATAAAGTGCTGCCAATCACTATTCATGGTGATGCGGCAGTAACCGGGCAAGGCGTGGTTCAG +>41 +CGTACTTTCCTTGAAGAGCTGACTGCCGCTGAAGGTTTAGAGCGCTATCTTGGTGCGAAATTCCCTGGTGCTAAACGTTTCTCTCTCGAAGGGGGGGATGCCTTAGTTCCGATGACCAAAGAGATGATCCGTCACGCGGGTGCCAGTGGCATGCGTGAAGTGGTGATTGGGATGGCGCACCGCGGTCGCTTGAACATGCTGGTCAACGTTCTGGGTAAAAAACCGCAAGATCTGTTTGATGAGTTTGCCGGTAAACATGGCGAAGGCTGGGGCACAGGTGATGTGAAATATCACCAAGGTTTCTCCGCTGACTTTGCGACACCGGGCGGTGATGTTCACTTAGCACTGGCTTTCAACCCATCGCATCTTGAGATTGTGAACCCTGTTGTGATGGGTTCAGTTCGCGCGCGTCAAGACCGCCTAGGTGATGAAGATGGCAGTAAAGTGCTACCTATCACTATCCATGGTGACTCTGCGATTGCCGGACAAGGTGTGGTGGCT +>42 +AAACGCTTCCTGAGCGAGCTGACCGCAGCCGAAGGCCTTGAGCGCTACCTGGGCGCGAAGTTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGCGACGCGCTGATCCCGATGCTGAAAGAGATGATTCGCCACGCGGGCAACAGCGGCACGCGTGAAGTGGTGCTGGGTATGGCGCACCGCGGTCGTCTTAACGTGCTGGTTAACGTGCTGGGTAAAAAACCGCAGGACCTGTTCGACGAGTTCGCGGGCAAACACAAAGAACACCTTGGCACCGGCGACGTGAAGTACCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGCCTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTAATTGGTTCGGTACGTGCCCGTCTGGATCGGCTGGACGAGCCGAGCAGCAACAAAGTACTGCCGATCACCATTCACGGCGACGCCGCGGTGACCGGTCAGGGCGTGGTTCAG +>43 +GTGCTGGGCCGTAATGGTTCCGACTATTCCGCCGCCGTGCTGGCCGCCTGTTTACGCGCTGACTGCTGTGAAATCTGGACTGACGTCGATGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGACTGCTGAAATCGATGTCCTACCAGGAAGCGATGGAACTCTCTTACTTCGGCGCCAAAGTCCTTCACCCTCGCACCATAACGCCTATCGCCCAGTTCCAGATCCCCTGTCTGATTAAAAATACCGGTAATCCGCAGGCGCCAGGAACGCTGATCGGCGCGTCCAGCGACGATGATAATCTGCCGGTTAAAGGGATCTCTAACCTTAACAACATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGGATGATTGGGATGGCGGCGCGTGTTTTCGCCGCCATGTCTCGCGCCGGGATCTCGGTGGTGCTCATTACCCAGTCCTCCTCTGAGTACAGCATCAGCTTCTGTGTGCCGCAGAGTGACTGC +>44 +GTGCTGGGGCGTAACGGTTCCGACTATTCCGCTGCGGTACTGGCCGCCTGTTTACGCGCCGACTGTTGCGAAATCTGGACGGACGTTGACGGTGTGTATACCTGCGACCCGCGCCAGGTGCCGGATGCCAGACTGCTGAAGTCAATGTCCTATCAGGAAGCGATGGAACTTTCCTACTTCGGCGCCAAAGTGCTTCACCCGCGTACCATTACTCCCATCGCTCAATTCCAGATCCCATGTCTGATAAAAAATACCGGTAATCCGCAAGCGCCGGGCACGCTGATTGGCGCCAACAGCGATGAAGACGGGCTACCGGTAAAAGGCATCTCGAACCTCAATAATATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGCATGGTCGGGATGGCGGCGCGCGTGTTCGCCACCATGTCGCGTGCCGGGATTTCGGTAGTGCTGATCACCCAATCCTCTTCGGAGTACAGCATCAGCTTCTGCGTGCCGCCAAAGCGATGC +>45 +GTGCTGGGCCGTAACGGCTCCGATTATTCCGCCGCCGTACTGGCCGCCTGTTTACGCGCTGACTGTTGTGAAATCTGGACTGACGTCGACGGCGTGTATACCTGCGACCCGCGTCAGGTGCCAGACGCCAGGCTGCTGAAGTCGATGTCTTATCAGGAAGCAATGGAGCTTTCTTACTTCGGCGCTAAAGTACTACATCCGCGCACTATTACTCCTATTGCCCAGTTCCAGATCCCTTGTCTGATTAAAAATACCGGCAATCCACAAGCGCCCGGTACGCTGATCGGCGCTGCCAGCGACGATGATGCTCTGCCGGTTAAAGGGATTTCTCACCTTAACAACATGGCGATGTTTAGTGTCTCCGGTCCGGGGATGAAAGGCATGGTGGGTATGGCGGCGCGCGTTTTTGCCGCTATGTCACGTGCGGGAATCTCGGTGGTGTTGATCACGCAATCTTCATCTGAATACAGCATCAGCTTCTGCGTGCCGCAGAGCGACTGC +>46 +GTGCTGGGCCGCAACGGTTCTGATTACTCCGCTGCGGTGTTGGCTGCCTGCTTACGCGCCGACTGTTGTGAGATCTGGACTGACGTTGACGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGGTTGCTGAAGTCGATGTCCTATCAGGAGGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTCCTTCATCCTCGCACCATCACCCCCATTGCCCAGTTCCAAATCCCATGCCTGATTAAAAACACCGGAAACCCGCAGGCCCCTGGTACGCTGATCGGCGCCAGCGTGGATGAAGACGAACTGCCGGTGAAAGGGATCTCGAACCTGAACAATATGGCGATGTTCAGCGTTTCCGGCCCAGGAATGAAAGGGATGATCGGGATGGCGGCGCGCGTCTTCGCGGCAATGTCCCGCGCGGGGATCTCCGTGGTGCTGATCACGCAATCCTCTTCTGAATACAGCATCAGTTTCTGCGTACCGCAGGGCGACTGC +>47 +GTGTTGGGGCGCAATGGCTCTGACTACTCTGCCGCTGTGCTGGCTGCCTGTTTACGCGCGGACTGTTGTGAGATCTGGACCGATGTCGACGGCGTATATACCTGCGATCCGCGCCAGGTACCCGATGCCCGACTGCTGAAGTCGATGTCTTATCAGGAAGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTTCTGCATCCGCGCACCATTACCCCAATTGCCCAGTTCCAGATCCCGTGCCTGATTAAAAATACCGGCAATCCACAAGCGCCTGGCACGTTGATCGGCGCCAGCAGTGATGAAGACGATTTGCCGGTAAAAGGTATTTCTAACCTCAATAACATGGCGATGTTTAGCGTCTCCGGCCCTGGAATGAAAGGCATGGTAGGCATGGCGGCGCGCGTTTTTGCCGCGATGTCGCGTGCGGGCATCTCGGTGGTGCTGATCACGCAGTCTTCTTCTGAATACAGCATCAGCTTCTGCGTTCCGCAGGGCGACTGC +>48 +GTATTAGGTCGCAATGGTTCAGACTACTCAGCTGCAGTATTAGCAGCCTGTTTACGTGCTAAATGCTGTGAAATTTGGACTGATGTTGACGGTGTTTATACTTGTGATCCACGTTTAGTGCCTGATGCACGTTTGTTAAAAGGCATGTCATATCAAGAGGCAATGGAACTGTCTTACTTTGGTGCCAAGGTACTTCATCCTCGTACAATTGCGCCTATTGCCCAATTCCAAATACCTTGTTTAATTAAAAATACGGGCAATCCAGATGCGCCGGGTACCTTGATTGGTGATGGTCAAAAAGATGAGAGCACACCTGTTAAAGGAATAACTAACCTTAATAATATGGCAATGATCAACGTATCTGGGCCTGGAATGAAAGGAATGGTAGGAATGGCGGCTCGCGTGTTCTCGGTAATGTCGAGAGCGGGGATTTCAGTTGTTCTAATCACACAGTCTTCTTCTGAATACAGCATTAGTTTTTGTGTGCCACAAAAAGAGCTG +>49 +GTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGCTGCCTGTTTACGCGCCGATTGTTGCGAGATTTGGACAGACGTTGACGGGGTCTATACCTGCGACCCGCGTCAGGTGCCCGATGCGAGGTTGTTGAAGTCGATGTCCTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCCCGCACCATTACCCCCATCGCCCAGTTCCAGATCCCTTGCCTGATTAAAAATACCGGAAATCCTCAAGCACCAGGTACGCTCATTGGTGCCAGCCGTGATGAAGACGAATTACCGGTCAAGGGCATTTCCAATCTGAATAACATGGCAATGTTCAGCGTTTCCGGCCCGGGGATGAAAGGAATGGTTGGCATGGCGGCGCGCGTCTTTGCAGCGATGTCACGCGCCCGTATTTCCGTGGTGCTGATTACGCAATCATCTTCCGAATACAGTATCAGTTTCTGCGTTCCACAAAGCGACTGT +>50 +GTGCTCGGGCGCAACGGCTCCGATTATTCCGCAGCGGTACTGGCAGCGTGTTTACGCGCCGATTGTTGCGAGATCTGGACTGATGTCGATGGTGTCTATACCTGCGACCCACGTCAGGTACCGGATGCCCGATTACTTAAGTCGATGTCGTACCAGGAGGCTATGGAACTCTCCTATTTCGGCGCCAAAGTCCTCCATCCTCGAACCATCACTCCCATCGCCCAGTTCCAGATTCCCTGCCTGATAAAAAATACCGGAAACCCGCAAGCACCAGGAACGCTGATTGGCGCCAGCCGCGACGAAGATGATCTGCCGGTGAAGGGCATTTCAAATCTCAATAATATGGCGATGTTCAGCGTCTCCGGGCCGGGGATGAAGGGAATGGTCGGCATGGCTGCTCGCGTGTTTGCGGCAATGTCTCGCTCAGGAATTTCGGTAGTCCTGATTACGCAATCCTCCTCTGAGTACAGCATTAGCTTCTGTGTACCGCAGGCTGACTGT +>51 +GTGCTGGGGCGTAACGGCTCTGACTACTCCGCCGCCGTGCTGGCGGCCTGCTTACGCGCGGACTGCTGTGAGATCTGGACTGACGTCGACGGCGTTTATACCTGCGATCCGCGCCAGGTACCGGACGCCAGGCTGCTGAAGTCGATGTCGTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCGCGTACCATCTCCCCGATTGCCCAGTTCCAAATCCCTTGCCTGATTAAGAATACCGGTAACCCTCAGGCGCCGGGCACGCTGATTGGCGCCAGCGCGGATGAAGATGAACTGCCGGTGAAAGGCATTTCTAACCTCAATAACATGGCGATGTTCAGCGTCTCCGGCCCGGGGATGAAGGGCATGGTCGGCATGGCGGCACGCGTATTTGCCGCTATGTCCCGCAACGGGATCTCCGTGGTGCTGATCACGCAGTCTTCTTCCGAATACAGCATCAGCTTCTGCGTTCCGCAGGGTGATTGC +>52 +GTATTAGGCCGTAACGGTTCCGACTACTCCGCCGCCGTGCTGGCCGCGTGTTTGCGCGCCGACTGTTGTGAGATCTGGACTGACGTCGACGGCGTCTATACCTGCGACCCGCGCCAGGTGCCGGACGCCAGGCTGCTGAAGTCGATGTCGTATCAGGAAGCCATGGAACTCTCCTACTTCGGCGCTAAAGTTCTCCACCCCCGCACCATTGCCCCCATCGCCCAGTTCCAAATCCCCTGTCTGATCAAAAACACTGGTAACCCGCAAGCGCCAGGCACCCTGATCGGTGCCAGCAGCGATGAAGACGGCCTGCCGGTGAAGGGCATCAGTAACCTGAATAATATGGCGATGTTCAGCGTCTCTGGTCCGGGCATGAAAGGCATGGTGGGAATGGCGGCGCGCGTGTTCGCGGCGATGTCCCGTGCGGGCATCTCGGTGGTGCTGATCACCCAATCGTCTTCTGAATACAGCATCAGCTTCTGCGTGCCGCAGGCCGACAGC diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.ndb b/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.ndb new file mode 100644 index 0000000000000000000000000000000000000000..dfa7d2e267e27fdcac41d817c56044823912df6b GIT binary patch literal 20480 zcmeI%u}*_f6adgGnn+wQ#(ywwF7Ez{QB0atSk$?{)y4En{D3a@50nD0P>05(Iy5;9 zy!&{M%k5#hB_bVp8+{VEZl~uF`CS|BJbm@Y&P}7mw14ZFqjezx0t5&U zAV7cs0RjXF5FoI>z|eR9NB=)$AEK`2tS>!{C;j~AzWslp33bORK$56a26G33lxW~VPj%q;tg;sy%comRr(uEevFOhThS)()6H3s z6>seN|G!V$0yw3B1Wqd;f-?%p;H&~dIH!OV&MP2> z3kt~Lq5^`rq<|zYBhZn=6$K=5RRKv{Q$P~e6_CUY1tf7(0ZD8qAckiNjy+M5)T!S#HIq0c%*}tG zZ^sMi6f4nz(xlPXYgh@Z&oHb@XD#FU)&IfI*Y>wVT&>YxPr&nmV7~6!xPjbe;7A0gSS#c1h+7YcT!^%w=s_o zQs4wev51dSQy0duf=|-XQ@Dd?*pZH%#)SS9@{s-)!w2|D{!^f4PZ_)9Tf|rLTNJwk#BTooH4p9;?y@ZRlXt;Imt1zmfvc{$?uMIgx$TZacinU3 wz6Tz9Z*opT}eelsIpMCMwH{bp6(=WgMapr&Q3tcn?%>V!Z literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nsq b/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nsq new file mode 100644 index 0000000000000000000000000000000000000000..7fc5e3b8e6fafd8419a1bdda10966df4ce2396be GIT binary patch literal 6297 zcmZ9Mk3W-p|Hn6KrJ`R5o!jXgnzNjv&Ns@A(o}>_DwShXLu_kiQ(xyW9GXe`!NxHA zk)0e|rLs<^qAApUQm1oQ;>2k>gtg6b7dlbPee``_*Vg%-$Ndjn&pn^7_h&fU?yf3w z)y>@g_lZg&z0+@j-e8eXQ193s( zCt37>1RIlK%0=ZTQ{iTk#0hMf?Y1WEO+e(QDM92EsP-xy!p*!m9%CkAXH}HyS zDnQM^k(N%2$!BfaKI=wncT_Do{_gI*p>sGHGN5x(vKc%)LRK0R+F#`;{hW>R`#{;VsepQ=fwXjqR%F{|vn;5!`(c&qz}?-0p>s?bvI9KDzLYIY zjBWrrxM%LeO(^@}j`)3MXmc}(7TY2#76IH1`;o{kwn#>E-ZN}=i{q!fggrAM#JEh<0j{B0I+gw}{cx7nn>c1CW_g;XD z7TIpXEyII*Eji)l?l{;zL-q$#k;KYoFkyEkBB>J><5s3c3#Lj3*D5lgyn11GCX}z2 zG<;cg_rwF8hVG7_ba@}p#h5$LiF!sIxC4EN zQHaskxbkKxPXZl26=bl`gWYL2zVN7*D{meAS{kvGRCa(zV+vlsxu;jqY0H>L%qg3P zNJZ5uO|vOknyr0)DLVl^kLcd6dLpT>qqVSrZth^ZcYgrBIRF1%O?UIE04OnSn1nI zhcJ2wxc`klU3ePkJd|F6fIGdXGe&=BD+hH~Tg$TxE#+<1B}zl)D{08|WwQ8`NG1i{ zy{V5rizzVk!iKz$!tSX1J1CzKhQRJ=7WXl5KkQHJ!*;(9^yU*|*zVJXSC(Xy03CL( z0s6ML51#1lfd04L*@yhF-EE8D>iAzvLtN`*+t1TNuD^Z*?kd_WmO$$RyXThz9d+;O z4KY&<1zovVdAPd2=pmbXBT%}f{3D=$oMCZqL|U68qTeQT=Vmwu-KPv%iw!@9}5YY!4NxOSIe$xI?yRovV3!jW_SQsllP#(6je>&y{Z-=uku zb{ezgpnk(f)fP{WWQX$jw%hI6tyLw#C8iT!#@JOY5vMyzicZ`A4MyRy3dfo@APs~2 z-&Q*fAWY5RrGqFeL2keGJ*-QncHyL4i{uDKBsf_LYSwCw?eJtY#OgzK{v+(jaGZOE zk|A%`it9^4I(zfGD*^Wivt`r9MDiua z66B`&TI9yzoXKFKOk}oEutE;G^j(FH(=0UiAkXhJ-77THU2ki*)LA>;sFt;}{Gj9C z)o^=R?PF(x5Trlr7m8EI5~jTWQmnP(Ru_|ECPG$~^eT8R2_u&x*#cuRPO!;T=+c*N z{;+r2)2mw<5s589o@e&EmvxMnwrksKFSJ!^=^LTr3Ng!X-g}sH-p$$QXq(#2iW`Wu z|4mP)>wqs$6Mqfsn5Yp#N5xDpvP#mcNS+sy@&j}P825=pC#1*Au8&Am1$pYP zsVYKtQQJ|o5V`5wFR4dQJ{n1Jy7nFh7cxDNX2WD{H7kPD(zuLgaju3qru?HTDKCzn z=wxXGoBYAd6U@pL?Q>Y_OXz6xaDhqWQ1A0umD24ySR&g!_u0i+O}ud1gM=QooFtZ} z*G@?GcDD(dR?M9$XA@%o?QFN6>m3%>7>}^;73!DPar-sHMbZWA;^68D?dyEwH{tw} z+I{2UIy#?Gty-v0^hS17O$2~%UfF+F_W z{Mvo5!+)jocU4O-8bTK*9*c=%EGHgsQ%dj%m$Zf$8N+{k*yh8fC_Ow2qjzmjWr%G1 z?q^8S70KZNt%SFAC3fvnR?U>8zgsWB=p2XG7tVGRuJ`e<#wvumrBLr#kGhM&y^K*k zVNNf&ofaNFzczY&J?1XGSiAJBQMh>t!|SPB@2bKx*`#Zjdob%O!v-ZjFbSn+RXS4> z^dgVgTWoND5x%$;+)Kcng-l&og3;|KYP|`u;Ev;Z9|gLaT~w$~tmU2rx(dnz-Sk?F zmIwFraNU3S43=so!c{I6X2mhQYI5~56`sK$xf3F({Ko}Y`P@L-WT>|~z4Ev12@H`3 z{T5q@mQP5vdB%a=`@TXZazA}I{?CSa_EY6`4!e2IBq3J53So<({9o=TV0Tk`z7gC@ zYWZmx-9x&t2Cu9;95cxEI+d?iDA4jJx`94f%UnIMxin}M&EhUa-EH^Wq86t&@oZ+d z5}uhX?wKXu|2~duSYba^TSwRp<=12M0fcQ|sAp7h<1P;0L$8ieJ*E97-}oA(hXNh0 zuIhxB{+O~>7}L-2VjO)=mCk2!NtNA^F{6)(Hmi}%%G_1f@>cqNwsKN&F3dYM{29BF zv_%2sOZGaqQJUsK`8o%;tnL7Q+t>* z$eqDJZyxW(AgL5K;EpiSo6jY>pK^SIVbi$jg6zfUe8GdXKHrun8VHFhTHex}@r z$Zh+|b|w%um4;Pl3yAIoR)%e#2pB*6v{HA(?a~2bBATF>nOp74d(V(!j6e3&AJksAqmfo%@vF(+8onYfNKddqD6vk+k zM%z!Uq!Y&HqYUbNVA!D^zz|wIROlX-R%c+}X{DdL#Q z+t8{{wlTqGkvymodG7%XTh(FM@{qpk?~k5Dv+MinE46{!y% z&m&X3{jxq;bO%~3Q*ASR*G{aQ{U7Dc*Ywo?h_)7#TK9;ypMpKcOG4mwsq>R}RbiWB zd$_25%^Zayu>|!%H-}d0aUMm^995{{u700{R-Cn294B-SZv{nPZmgAi&}E4_n{_2& zjmU9~0q?_drasm)U1Uvv8O~I1c?vx2Of4q_S#+*ok2IbSHmQ`OJGHA%1`b9w>FT-- z4DL{Lri-)Z57#yNq7PoN4^!37Wpz!jaxHXKC`PZtjb}3PtrMBE*rJrV(MHOVVQNHD z5V3?R9Bw&jD&aNI3AgJBMxb{VjSN`mTb~63y=~>$*WBt=_&B7%LXXleI=QMd>POv~ zK0_k+5iJeq#gwahd&gJ9qOj%8H5U4}MAV&>Ll{ew13eM{U0UWW&fygMXyf%GI%-I! zh29J9Jk(u{(TfYfU5mO0_k^ey3))ckf$@(LkP0>C9@V>OFz}yIKmNh#o6vKILUa73 zK-U2MA?GmF$GPS#=DvqGqd>CHJCkrAQ!W@!Vp`~;6#H%94s_ZL;)NLDu=_a+{kQWd zeW|3VJwDG|htb>A;12XHn0upo3tAp?e;L*E2dDeiQ`XSqOc$U-dHWB*o&BkE4GHMU z2V;d^1ya8EOJZt{Mlhaf#J8rW%{n8BT^>CE^q7z&y-%r9I4s6?&zWYdC%lI8#YH1J zwEP!wr_O9t2MF#tdcUR)OvO7RS4KA7asTcC>^>OS|5c0bZ07`%JCpjqSNcVg>zcNE zA3Pi`$Ph~pc{iDE!0tq^d`UDZhY0S%_wMY&tfFYRI-NEI-5uyEaA!6=As8|D>5_c3 zd>ww?lHj-4?tfif#ou-Z_n(G)ZbA7T%$;&UU*u@Hx=ZNpnrAT%&GGG2hQ`Dgxy33_ z^{=iPX!wG6mp(u(Q`1~SGx}Er?2Z&BoJ{{V8`<_*wv*QJM#ZX~&7oac{83xyAG*UC zpSbGS%E)-piay%tveo-zr!7PRUUIw3Py5dj1y&>StA9#&G%{&LJ;Rl2c3%FCf5Ede z`-NWO;~u1WrX-+r!v&PiC}@dWT~*cM_Tnx*9;K6kekovgC(sY>2D(qSlhSdlYt`3l zL%Ra_*HQXJ-RbvaA4)gq z%j`rzk0DehfICKK6|`XPZr~nIy`+wG4bSLr2>7;BxZ(SQ;J*LU1Ag)k#(oP#-6v7^ z;)m-Q9B@~GI~jFf_qE0SdH3Zr{<9*QE&~0XH1!fM$-ZZ}#W!*Ful&z3x`c?jQwhzw z+f>6ZU*}9&SMD}9m@M@9Xoabozg|-kK&yLQQ^CWfnKJp7P`38 z`j@SaqfZ0(&Xn%*PoVrN)ID3V;W9I+sC!uDo9OohrT=MYlOXOvSD(opDhF<7urwz9 z$So#DPiSyUzDrM_mYs_tM_b*k^yL|Ta>ucqn7ih^yJ<1dB|4$Y6z#hAy8V0RSa%-+ z_a93!_k;#s(v}{S-iXn&4dW>N;xAf<=8tZNJ-v8m>Budkt*U=rm9E8Y4rs@aH5Yl(nmj8 zu|Ias${PO%$$hmzcla?6sZ=)i{M7JqlDKp7ci)W|{pGtC`gt~9xth1GaCNsc446AZ z+_HCF)kiIE^_JbE$j2tp@^JItqgS_odA1YPacmcM^Jdgt@Z0)eZ`WUk%cS7GhW4Ot z-L}3t3v00Qu)9MG>V8m?aETe!4&~P*`aX&N{8K1DK9A^rL(?P2u8w%YM1-3M`sylO z!xztizWvfU8aaAwv>_ll@>uH0^xc2Li~Gj!kz+>}N*~WaZ(b0!J{Vq{sZ1I#^zWnf z*R7-UqBq~k?XErJzs#Yb^i-v?Rk5Lw6+XA;U-UJJe&~yPnf;pcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nto b/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nto new file mode 100644 index 0000000000000000000000000000000000000000..ad19396e81aff427697a109c3c035ac73cb27f3f GIT binary patch literal 216 zcmXBFg${xM06;;oyHK$M6YTo`U-LHJc6+}dFSG(hN|dQkrAD0wO0 +KFRPGHADYTYHQKYGVRDYRGGGRSSARETAMRVAAGAIAKKYLQQEFGIEVRAYLSQMGDVAIDKVDWNEIENNDFFCPDVDKVAAFDELIRELKKEGDSIGAKIQVVATGVPVGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVRQKGSQHRDPLTPQG +>1 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKHGIVIQGCLTQMGDIPLEIKDWQQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVANGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>2 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGVEIRGCLTQMGDIPLEIKDWSQVELNPFFCPDPDKIEVLDELMRGLKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>3 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLKEKLGIEVRGYLSQLGPITCDLVDWSIVESNPFFCPDPSRLDALDEYMRALKKEGNSIGAKVTVVAQGVPAGFGEPVFDRLDADLAHALMSINAVKGIEIGDGFGVVTLKGTENRDEITKKG +>4 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGIEIRGCLTQMGDIPLEIKDWSQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKDG +>5 +VFRPGHADYTYEQKYGFRDYRGGGRSSARETAMRVAAGAIAKKYLQQKFGIVIRGCLSQMGDIPLAIKDWDQVELNPFFCADADKLDALDELMRGLKKEGDSIGAKVTVVADGVPAGWGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVKLRGSQNRDEITKAG +>6 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAQKFGVVIRGCLTQMGDIPLEIKDWDQVEQNPFFCPDPDKIEALDELMRALKKEGDSIGAKVTVVADSVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFGVVQLRGSQNRDEITTAG +>7 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>8 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>9 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>10 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQPLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>11 +MEMVARVALVQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGERMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGQSLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>12 +MEMIARVTLTQPHDAGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGDSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>13 +MEMIARVALSLPHQAGATTVPARKFFDICRGLPEGAEIAVTLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCAMPVGQPLPNHSVIVPRKGVLELMRMLDGGDSPLRIQ +>14 +SALTENDLVFALSQHAVTFADAELQQQGKSWPSLPRYFAIGRTTALALHTVSGFNIHYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELIGETLTARGADVDFCECYQRSAKYYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>15 +AALGESDLLFALSQHAVAFAQSQLHQQDRKWPRLPTYFAIGRTTALALHTVSGQKILYPQDREISEVLLQLPELQNIAGKRALILRGNGGRELIGDTLTARGAEVTFCECYQRCAIHYDGAEEAMRWQSREVTTVVVTSGEMLQ +>16 +ATLTENDLVFALSQHAVAFAHAQLQRDGRNWPASPRYFAIGRTTALALHTVSGFDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRGRELLGETLTARGAEVSFCECYQRSAKHYDGAEEAMRWHTRGVTTLVVTSGEMLQ +>17 +AALTDNDLVFALSQHAVAFAHAQLQQQELDWPVQPRYFAIGRTTALALHTVNGCDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELLGKTLTERGAEVTFCECYQRSAKHYDGAEEAMRWHSRGVTTIVVTSGEMLQ +>18 +ETLGDNDLLFALSQHAVSFAHAQLQQQGLNWPSLPHYFAIGRTTALALHTVSGHKIRYPQDREISEVLLQLPELQSIAGKRALILRGNGGRELIGQTLTSRGADVTFCECYQRSAKHYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>19 +RLLQEGDLLFALSQHAVEFAHAQLQQHAVSWPHAPRYFAIGRTTALALHTASGIDVRYPLDREISEVLLQLPELQTIAGKRALILRGNGGRELLGETLRERGADVTFVECYQRCAKHYDGAEEAMRWHARGINTLVVTSGEMLQ +>20 +IAGCQKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADIARKVAEAVERQLAELPRAGTARQALSASRLIVTKDLAQCV +>21 +IAGCKKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGIAQNVAEAVERQLAELPRAETARQALSASRLIVTKDLAQCV +>22 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGMASRVAEAVERQLAALPRAETARQALSASRLIVTRSLAQCV +>23 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGTESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPAADMARRVAEAVERQLAELPRAETARQALNASRLIVTKDLAQCV +>24 +IAGCQKVVLCSPPPIADEILYAAKLCGVQAIYKVGGAQAISALAFGTVSIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADMAKRVGDAVERQLADLPRAETARQALSASRLIVARDLDQCI +>25 +IAGCKKVVLCSPPPIADEILYAAQLCGVKEVFNVGGAQAIAALALGTESIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAKLAEGVAEAVERQLAELSRADTARQALSASRLIVAKDLAQCV +>26 +IAGCKKVVLCSPPPIADEILYAARLCGVQQVYQVGGAQAIAALAFGTETVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATTDFVASDLLSQAEHGPDSQVILLTPDSAMAQAVADAVERQLAELPRAETARQALAESRLIVARDLAQCV +>27 +SDWATMQFAAEIFDILDIPHHVEVVSAHRTPDKLFSFAENAEENGFQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLRD +>28 +SDWTTMQFAAEIFEILDVPHHVEVVSAHRTPDKLFSFAETAEENGYHVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLSN +>29 +SDWATMQFAAEILDILNVPHHVEVVSAHRTPDKLFSFAEDAESNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>30 +SDWATMQFAVEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKELHQRLNG +>31 +SDWATMQFAAEIFDILNVPHHVEVVSAHRTPDKLFSFAESAEEKGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLRQRLAD +>32 +SDWATMQFAAEIFEMLDVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKALHQRLSD +>33 +SDWATMSHAADVLDTLQIPYHVEIVSAHRTPDKLFSFAEKAKSNGFDVIIAGAGGAAHLPGMLAAKTLVPVFGVPVQSATLSGVDSLYSIVQMPKGIPVGTLAIGKAGAANAALLAAQVLALHSPAILDALTA +>34 +SDWATMQFAAEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYEVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLHQRLAE +>35 +SDWATMQFAAETAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>36 +SDWATMQHAAEILDALDVPYHVEVVSAHRTPDKLFSFAESAQHNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDDALLARLAA +>37 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLINVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>38 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>39 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDFQTDGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>40 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>41 +RTFLEELTAAEGLERYLGAKFPGAKRFSLEGGDALVPMTKEMIRHAGASGMREVVIGMAHRGRLNMLVNVLGKKPQDLFDEFAGKHGEGWGTGDVKYHQGFSADFATPGGDVHLALAFNPSHLEIVNPVVMGSVRARQDRLGDEDGSKVLPITIHGDSAIAGQGVVA +>42 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>43 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDDDNLPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>44 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGANSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFATMSRAGISVVLITQSSSEYSISFCVPPKRC +>45 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGAASDDDALPVKGISHLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>46 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASVDEDELPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>47 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>48 +VLGRNGSDYSAAVLAACLRAKCCEIWTDVDGVYTCDPRLVPDARLLKGMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPDAPGTLIGDGQKDESTPVKGITNLNNMAMINVSGPGMKGMVGMAARVFSVMSRAGISVVLITQSSSEYSISFCVPQKEL +>49 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDC +>50 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRSGISVVLITQSSSEYSISFCVPQADC +>51 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTISPIAQFQIPCLIKNTGNPQAPGTLIGASADEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRNGISVVLITQSSSEYSISFCVPQGDC +>52 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQADS diff --git a/locidex/example/manifest_in/passes/pass_one_db/config.json b/locidex/example/manifest_in/passes/pass_one_db/config.json new file mode 100644 index 0000000..dd121fd --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_one_db/config.json @@ -0,0 +1,12 @@ +{ + "db_name": "Locidex Database", + "db_version": "1.0.0", + "db_date": "04/04/2024", + "db_author": "test1", + "db_desc": "test1", + "db_num_seqs": 53, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/pass_one_db/meta.json b/locidex/example/manifest_in/passes/pass_one_db/meta.json new file mode 100644 index 0000000..f3b88fa --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_one_db/meta.json @@ -0,0 +1,1181 @@ +{ + "info": { + "num_seqs": 53, + "is_cds": "True", + "trans_table": 11, + "dna_min_len": 220, + "dna_max_len": 350, + "dna_min_ident": 80, + "aa_min_len": 73, + "aa_max_len": 116, + "aa_min_ident": 80 + }, + "meta": { + "0": { + "seq_id": 0, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 609, + "dna_seq_len": 501, + "dna_seq_hash": "4811bc98591c74954ace3cb487330482", + "aa_seq_len": 167, + "aa_seq_hash": "a8fbcf8179d8548f980b7b15f29de1d4", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "1": { + "seq_id": 1, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 614, + "dna_seq_len": 501, + "dna_seq_hash": "b66979eaf680fab872ffe1bde4c092d6", + "aa_seq_len": 167, + "aa_seq_hash": "3e034a4d80ac27352822774abd9319df", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "2": { + "seq_id": 2, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 618, + "dna_seq_len": 501, + "dna_seq_hash": "f02a36ff6df05f9bf38428fa22a035da", + "aa_seq_len": 167, + "aa_seq_hash": "e2d30bb18231528ef65c34880704dd7a", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "3": { + "seq_id": 3, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 619, + "dna_seq_len": 501, + "dna_seq_hash": "bee9d7360aa8e9b840fb29afa1de2c2e", + "aa_seq_len": 167, + "aa_seq_hash": "c3f71f5780b5f1031aaf21697a482ee3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "4": { + "seq_id": 4, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 620, + "dna_seq_len": 501, + "dna_seq_hash": "5b7956485455fdbc7c86d4834a8f7406", + "aa_seq_len": 167, + "aa_seq_hash": "60ce8f3b07f53378580ee528910ee623", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "5": { + "seq_id": 5, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 624, + "dna_seq_len": 501, + "dna_seq_hash": "98ba14aac74444a253123aff3d20c69f", + "aa_seq_len": 167, + "aa_seq_hash": "bab41702c7c209def93f9c9930c27086", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "6": { + "seq_id": 6, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 716, + "dna_seq_len": 501, + "dna_seq_hash": "6b9166d5d996897cae3cc288d7969d78", + "aa_seq_len": 167, + "aa_seq_hash": "5bc86c0a9226224922cbd6219c182622", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "7": { + "seq_id": 7, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "d401763f2df6e5fe87e1e07d3c170fe6", + "aa_seq_len": 167, + "aa_seq_hash": "928ad814483bbffda3e3b3a0aa4ca072", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "8": { + "seq_id": 8, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 120, + "dna_seq_len": 501, + "dna_seq_hash": "9c50d73cc4ef8d0a447f07ad150ad8cc", + "aa_seq_len": 167, + "aa_seq_hash": "928ad814483bbffda3e3b3a0aa4ca072", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "9": { + "seq_id": 9, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 555, + "dna_seq_len": 501, + "dna_seq_hash": "fab4f658dfba0cd0174a4a87998cf948", + "aa_seq_len": 167, + "aa_seq_hash": "a081905e659429db1f40e145932ae277", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "10": { + "seq_id": 10, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 557, + "dna_seq_len": 501, + "dna_seq_hash": "acb2ed027124e2a54b7734cd538590f1", + "aa_seq_len": 167, + "aa_seq_hash": "970184ec5ccc9f02ee3c858d2687cc18", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "11": { + "seq_id": 11, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 558, + "dna_seq_len": 501, + "dna_seq_hash": "ad996a122298d55ab3d4b2ea7a4974b0", + "aa_seq_len": 167, + "aa_seq_hash": "945455021fffea9b793d16af630db961", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "12": { + "seq_id": 12, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 563, + "dna_seq_len": 501, + "dna_seq_hash": "815242e67f31f4e2968f7f0620565125", + "aa_seq_len": 167, + "aa_seq_hash": "1b117ca76a022ae63d6f7bfe2ead289e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "13": { + "seq_id": 13, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 633, + "dna_seq_len": 501, + "dna_seq_hash": "532742ae95c046241789d79e68e30b7a", + "aa_seq_len": 167, + "aa_seq_hash": "fff51d2396f3da88a775416b4c6d14b6", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "14": { + "seq_id": 14, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 316, + "dna_seq_len": 432, + "dna_seq_hash": "3922f6256f2891400db415013eb0b208", + "aa_seq_len": 144, + "aa_seq_hash": "0af9d546dfcaf93373a8919df3e30323", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "15": { + "seq_id": 15, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 343, + "dna_seq_len": 432, + "dna_seq_hash": "f76c13e33ad5b502dfe64181dbdf2378", + "aa_seq_len": 144, + "aa_seq_hash": "32484f065f9013aaa5b3c694cc99cdbf", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "16": { + "seq_id": 16, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 472, + "dna_seq_len": 438, + "dna_seq_hash": "80bea3abd165ee14e51bc9e9779fc6a1", + "aa_seq_len": 146, + "aa_seq_hash": "4e9cc2d289f1c946738cc8e6e4ef1186", + "dna_min_len": 306, + "dna_max_len": 744, + "aa_min_len": 102, + "aa_max_len": 248, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "17": { + "seq_id": 17, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 489, + "dna_seq_len": 432, + "dna_seq_hash": "83a314185d9ff0bf7c2953d30979e7eb", + "aa_seq_len": 144, + "aa_seq_hash": "5f9fc3707789543f2f14b0f1a555a05c", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "18": { + "seq_id": 18, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 497, + "dna_seq_len": 432, + "dna_seq_hash": "c70622b317de74bdaf57eb8bb5134537", + "aa_seq_len": 144, + "aa_seq_hash": "56b3d46d3e517eb7f83f089f9ed5ae2a", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "19": { + "seq_id": 19, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 498, + "dna_seq_len": 432, + "dna_seq_hash": "f284b11b34de688e2ef54c1b73936595", + "aa_seq_len": 144, + "aa_seq_hash": "da558cdebd900031d0df8f58ef01454e", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "20": { + "seq_id": 20, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "9f762c246c542c52c94c5022ca62311c", + "aa_seq_len": 167, + "aa_seq_hash": "447381a0d286fa1037b5499e2242819a", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "21": { + "seq_id": 21, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 754, + "dna_seq_len": 501, + "dna_seq_hash": "65b434bea0d1939d2b748dbc5dd6df8b", + "aa_seq_len": 167, + "aa_seq_hash": "2b685aa7892794b69c9faa20c58a9183", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "22": { + "seq_id": 22, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 757, + "dna_seq_len": 501, + "dna_seq_hash": "eccfc35078428e44e5dd3e85d9ebf1fe", + "aa_seq_len": 167, + "aa_seq_hash": "35fa89ee4cd8689b89d553157471afe0", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "23": { + "seq_id": 23, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 759, + "dna_seq_len": 501, + "dna_seq_hash": "ce01d780cd0ffe3197f708d7048a473b", + "aa_seq_len": 167, + "aa_seq_hash": "bc0edd26ea6032cc4939e8cbc17a12d3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "24": { + "seq_id": 24, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 768, + "dna_seq_len": 501, + "dna_seq_hash": "23377e95fe00bf6a16b51fe8929a938a", + "aa_seq_len": 167, + "aa_seq_hash": "9fb34628ef67396ed38c755280e04f7e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "25": { + "seq_id": 25, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 838, + "dna_seq_len": 501, + "dna_seq_hash": "8478cdd016753651cd73afc4ad20c7df", + "aa_seq_len": 167, + "aa_seq_hash": "6512669779521a6792ecdae3088467f7", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "26": { + "seq_id": 26, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 907, + "dna_seq_len": 501, + "dna_seq_hash": "ab935d39fffeff601d95a8362ba454f3", + "aa_seq_len": 167, + "aa_seq_hash": "1c277aef51e883e29ee8b489c525ea1b", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "27": { + "seq_id": 27, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 24, + "dna_seq_len": 399, + "dna_seq_hash": "a7af783dc7084f1b8bc593aa29f80003", + "aa_seq_len": 133, + "aa_seq_hash": "46a0c532edb92303b1b9d12a80056a60", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "28": { + "seq_id": 28, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 48, + "dna_seq_len": 399, + "dna_seq_hash": "9fb313e6232b0d0e14d2fc4be7c409f7", + "aa_seq_len": 133, + "aa_seq_hash": "0e56efdd1f7fbaf132524616e29d98ca", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "29": { + "seq_id": 29, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 317, + "dna_seq_len": 399, + "dna_seq_hash": "50cd750e2f6860dd489040f1d5f64f9b", + "aa_seq_len": 133, + "aa_seq_hash": "18e887a66ce56a930dbf8db48b406596", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "30": { + "seq_id": 30, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 487, + "dna_seq_len": 399, + "dna_seq_hash": "0e1384e36f3897f65690f9230d2bcd73", + "aa_seq_len": 133, + "aa_seq_hash": "20c9a488aa6542257a151ced866d2f8f", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "31": { + "seq_id": 31, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 608, + "dna_seq_len": 399, + "dna_seq_hash": "e180fd1852382c132851674a9e379c03", + "aa_seq_len": 133, + "aa_seq_hash": "c7da76b50946241fe125348a19a9b6a3", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "32": { + "seq_id": 32, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 611, + "dna_seq_len": 399, + "dna_seq_hash": "0ec842f985e93041c928ab7bb137295d", + "aa_seq_len": 133, + "aa_seq_hash": "be3990f2abaa8780b14e62d4fc8cd82a", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "33": { + "seq_id": 33, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 612, + "dna_seq_len": 399, + "dna_seq_hash": "9d42e484ea2936f87312f07abf0ad84a", + "aa_seq_len": 133, + "aa_seq_hash": "7af624e3930c7a5ab7785b08d925081c", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "34": { + "seq_id": 34, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 619, + "dna_seq_len": 399, + "dna_seq_hash": "02949c6f858f3cc5de1b13c9f5a40705", + "aa_seq_len": 133, + "aa_seq_hash": "52d120d4090a22e450633e01e4ccb729", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "35": { + "seq_id": 35, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 631, + "dna_seq_len": 315, + "dna_seq_hash": "c4715d7df9a9eebfe5a334dd55ee469b", + "aa_seq_len": 105, + "aa_seq_hash": "31aa38918b303bf67374188e11413e59", + "dna_min_len": 220, + "dna_max_len": 535, + "aa_min_len": 73, + "aa_max_len": 178, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "36": { + "seq_id": 36, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 724, + "dna_seq_len": 399, + "dna_seq_hash": "782d08e7ee8a031a1402020e708bfbbc", + "aa_seq_len": 133, + "aa_seq_hash": "b5f9063808b8be839e7f169bf73c88e4", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "37": { + "seq_id": 37, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "481b6454f33fae7875b4978c14094ec3", + "aa_seq_len": 167, + "aa_seq_hash": "fa04457773c66ae015014e915af2516d", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "38": { + "seq_id": 38, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 30, + "dna_seq_len": 501, + "dna_seq_hash": "79048d21794195277a6af839be13e6e1", + "aa_seq_len": 167, + "aa_seq_hash": "186c53cb5c2bf0b7ecac853c6067065d", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "39": { + "seq_id": 39, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 281, + "dna_seq_len": 501, + "dna_seq_hash": "f10d273aa97d5556a43b96721d666975", + "aa_seq_len": 167, + "aa_seq_hash": "4172d5e8c8265884fe5479e10527cb02", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "40": { + "seq_id": 40, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 399, + "dna_seq_len": 501, + "dna_seq_hash": "1839775cc7c29412648ec7b004e1a417", + "aa_seq_len": 167, + "aa_seq_hash": "c4cfbbf5c5814829188f4f404f312bd3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "41": { + "seq_id": 41, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 571, + "dna_seq_len": 501, + "dna_seq_hash": "fce3e68952108e415579b3ad24a3f150", + "aa_seq_len": 167, + "aa_seq_hash": "43372b6526524f5ed4542be83b5b8614", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "42": { + "seq_id": 42, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 686, + "dna_seq_len": 501, + "dna_seq_hash": "629ea0cbfe0d2e9f34b1ca034a6c55fd", + "aa_seq_len": 167, + "aa_seq_hash": "c4cfbbf5c5814829188f4f404f312bd3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "43": { + "seq_id": 43, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "eaec644b411bd0b3ab1e086fbabd29c9", + "aa_seq_len": 167, + "aa_seq_hash": "bfe756f2f421db752907a171f3a44d69", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "44": { + "seq_id": 44, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 31, + "dna_seq_len": 501, + "dna_seq_hash": "97e4acce4e840b1c48de51f55fccf620", + "aa_seq_len": 167, + "aa_seq_hash": "be9296cb1ea9443fb43c0f967d107988", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "45": { + "seq_id": 45, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 208, + "dna_seq_len": 501, + "dna_seq_hash": "fbc6cb34cddfb1fe6a7806d5f7613259", + "aa_seq_len": 167, + "aa_seq_hash": "b788ec581475c9ba71d997b2db6e1def", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "46": { + "seq_id": 46, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 630, + "dna_seq_len": 501, + "dna_seq_hash": "ce58c0cacd4e8d9fa4867d11f2add864", + "aa_seq_len": 167, + "aa_seq_hash": "c062c5c88bdebdf2883e06fe6823c71c", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "47": { + "seq_id": 47, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 631, + "dna_seq_len": 501, + "dna_seq_hash": "949426df5430f94547459d06c786d77b", + "aa_seq_len": 167, + "aa_seq_hash": "dac50e2b5df83fe87c9826ecf99d568e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "48": { + "seq_id": 48, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 632, + "dna_seq_len": 501, + "dna_seq_hash": "9a187a6b3e4675fe12ea213c7a23577c", + "aa_seq_len": 167, + "aa_seq_hash": "6536824faaa7880cfb44a6cd1ed057c9", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "49": { + "seq_id": 49, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 633, + "dna_seq_len": 501, + "dna_seq_hash": "7be8b9732228c1f82630b547d7011a5e", + "aa_seq_len": 167, + "aa_seq_hash": "1eac2cb94b8f619df1c9b0f3369f4a96", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "50": { + "seq_id": 50, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 637, + "dna_seq_len": 501, + "dna_seq_hash": "1895acdf991b49a885873fe82ce9ca85", + "aa_seq_len": 167, + "aa_seq_hash": "9fe9521d0bf495570a0fd425c0e48764", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "51": { + "seq_id": 51, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 638, + "dna_seq_len": 501, + "dna_seq_hash": "9776bbec78b5214d3dfca0d32b395d4b", + "aa_seq_len": 167, + "aa_seq_hash": "2914d167cc3579348e36d16afc628a39", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "52": { + "seq_id": 52, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 748, + "dna_seq_len": 501, + "dna_seq_hash": "6cf9d69644c819d9ecd3a0fd090977fc", + "aa_seq_len": 167, + "aa_seq_hash": "cf0168a601a4f5792c7326a2da650edb", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + } + } +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/pass_one_db/results.json b/locidex/example/manifest_in/passes/pass_one_db/results.json new file mode 100644 index 0000000..5252454 --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_one_db/results.json @@ -0,0 +1,14 @@ +{ + "analysis_start_time": "2024-04-04 14:12:12", + "parameters": { + "input_file": "locidex/example/build_db_mlst_in/senterica.mlst.txt", + "outdir": "/tmp/pytest-of-mwells/pytest-82/build0", + "name": "Locidex Database", + "db_ver": "1.0.0", + "db_desc": "", + "author": "", + "date": "", + "force": true + }, + "analysis_end_time": "2024-04-04 14:12:12" +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/run.json b/locidex/example/manifest_in/passes/run.json new file mode 100644 index 0000000..41cee63 --- /dev/null +++ b/locidex/example/manifest_in/passes/run.json @@ -0,0 +1,7 @@ +{ + "analysis_start_time": "25/04/2024 14:12:06", + "parameters": { + "input": "./locidex/example/manifest_in/passes/" + }, + "analysis_end_time": "25/04/2024 14:12:06" +} \ No newline at end of file diff --git a/locidex/example/manifest_out/manifest.json b/locidex/example/manifest_out/manifest.json new file mode 100644 index 0000000..c4376a8 --- /dev/null +++ b/locidex/example/manifest_out/manifest.json @@ -0,0 +1,8 @@ +{ + "Locidex Database": { + "1.0.0": { + "db_relative_path_dir": "/build_db_mlst_out", + "db_relative_path_config": "/build_db_mlst_out/config.json" + } + } +} \ No newline at end of file diff --git a/locidex/example/manifest_out/run.json b/locidex/example/manifest_out/run.json new file mode 100644 index 0000000..a8dd4ab --- /dev/null +++ b/locidex/example/manifest_out/run.json @@ -0,0 +1,7 @@ +{ + "analysis_start_time": "25/04/2024 12:43:50", + "parameters": { + "input": "./locidex/example/manifest_in/passes/" + }, + "analysis_end_time": "25/04/2024 12:43:50" +} \ No newline at end of file diff --git a/locidex/manifest.py b/locidex/manifest.py index 0e31e40..300b3d1 100644 --- a/locidex/manifest.py +++ b/locidex/manifest.py @@ -22,6 +22,7 @@ def run_merge(config): #Input Parameters input_dir = config['input'] in_dirname = input_dir.split('/')[-1] + run_data = {} run_data['analysis_start_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") run_data['parameters'] = analysis_parameters @@ -52,13 +53,13 @@ def run_merge(config): continue for field in db_keys: if not field in c: - print(f'Error db config: {fpath} is missing a needed field key for {field}, please set one') - sys.exit() + print(f'Error db config: {fpath} is missing a needed field key for {field}, please set one', file=sys.stderr) + raise KeyError v = c[field] if v == '': - print(f'Error db config: {fpath} is missing a needed field value for {field}, please set one') - sys.exit() + print(f'Error db config: {fpath} is missing a needed field value for {field}, please set one', file=sys.stderr) + raise KeyError db_name = str(c['db_name']) db_version = str(c['db_version']) @@ -66,12 +67,14 @@ def run_merge(config): config_files[db_name] = {} if db_version in config_files[db_name]: print(f"Error you are trying to populate duplicate entries for db_name {db_name} and version {db_version}. \ - Manifest only supports distinct db_entries, please resolve duplicates") + Manifest only supports distinct db_entries, please resolve duplicates", file=sys.stderr) sys.exit() config_files[db_name][db_version] = { - 'db_relative_path_dir': f"{in_dirname}/{dirname}", - 'db_relative_path_config': f"{in_dirname}/{dirname}/config.json", + #'db_relative_path_dir': f"{in_dirname}/{dirname}", + 'db_relative_path_dir': os.path.join(in_dirname, dirname), + #'db_relative_path_config': f"{in_dirname}/{dirname}/config.json", + 'db_relative_path_config': os.path.join(in_dirname, dirname, "config.json"), } with open(os.path.join(input_dir,"manifest.json"),'w' ) as fh: diff --git a/locidex/search.py b/locidex/search.py index 513d14f..f4c1213 100644 --- a/locidex/search.py +++ b/locidex/search.py @@ -155,8 +155,8 @@ def run_search(config): db_dir_rel_path = manifest[db_name][db_version]['db_relative_path_dir'].split('/') if db_dir_prefix[-1] == db_dir_rel_path[0]: db_dir_prefix = db_dir_prefix[0:-1] - db_dir_prefix = "/".join(db_dir_prefix) - db_dir_rel_path = "/".join(db_dir_rel_path ) + db_dir_prefix = os.path.join(db_dir_prefix) + db_dir_rel_path = os.path.join(db_dir_rel_path) db_dir = os.path.join(db_dir_prefix,db_dir_rel_path) if not os.path.isdir(db_dir): From a40b9134833aa8e42ac007184e1887ed46b83b6b Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Mon, 29 Apr 2024 14:58:12 -0500 Subject: [PATCH 32/51] updated db config to dataclass --- locidex/build.py | 48 +++++++++++++++++++++--------------- locidex/constants.py | 58 +++++++++++++++++++++++++++++++++++--------- locidex/manifest.py | 10 ++------ tests/test_db.py | 5 ++-- 4 files changed, 79 insertions(+), 42 deletions(-) diff --git a/locidex/build.py b/locidex/build.py index 8b31fa6..926d9b3 100644 --- a/locidex/build.py +++ b/locidex/build.py @@ -4,8 +4,9 @@ import os, sys from argparse import (ArgumentParser, ArgumentDefaultsHelpFormatter, RawDescriptionHelpFormatter) from locidex.version import __version__ -from locidex.constants import FORMAT_RUN_DATA, DB_CONFIG_FIELDS +from locidex.constants import FORMAT_RUN_DATA from locidex.classes import run_command +from locidex.constants import DBConfig class locidex_build: input_file = None @@ -21,7 +22,7 @@ class locidex_build: messages = [] - def __init__(self, input_file, outdir,config={},seq_columns={'nucleotide':'dna_seq','protein':'aa_seq'},force=False,parse_seqids=False): + def __init__(self, input_file: os.PathLike, outdir: os.PathLike, config: DBConfig,seq_columns={'nucleotide':'dna_seq','protein':'aa_seq'},force=False,parse_seqids=False): self.input_file = input_file self.outdir = outdir self.force = force @@ -37,7 +38,7 @@ def __init__(self, input_file, outdir,config={},seq_columns={'nucleotide':'dna_s self.df = self.read_data( self.input_file) self.config = config - self.config["db_num_seqs"] = len(self.df) + self.config.db_num_seqs = len(self.df) for t in seq_columns: col_name = seq_columns[t] @@ -46,16 +47,16 @@ def __init__(self, input_file, outdir,config={},seq_columns={'nucleotide':'dna_s outfile = os.path.join(self.blast_dir, t) if t == 'nucleotide': self.is_dna = True - self.config["nucleotide_db_name"] = t + self.config.nucleotide_db_name = t blast_method = 'nucl' elif t == 'protein': self.is_protein = True - self.config["protein_db_name"] = t + self.config.protein_db_name = t blast_method = 'prot' self.create_seq_db(t, col_name, outfile, blast_method) - self.config["is_nucl"] = self.is_dna - self.config["is_prot"] = self.is_protein + self.config.is_nucl = self.is_dna + self.config.is_prot = self.is_protein self.get_metadata(self.df,columns_to_exclude=list(seq_columns.values())) def create_seq_db(self,stype,col_name,outfile,blast_method='nucl'): @@ -144,8 +145,6 @@ def add_args(parser=None): parser.add_argument('-o', '--outdir', type=str, required=True, help='Output directory to put results') parser.add_argument('-n', '--name', type=str, required=False, help='DB name',default='Locidex Database') parser.add_argument('-a', '--author', type=str, required=False, help='Author Name for Locidex Database',default='') - parser.add_argument('-d', '--date', type=str, required=False, help='Creation date for Locidex Database', - default='') parser.add_argument('-c', '--db_ver', type=str, required=False, help='Version code for locidex db', default='1.0.0') parser.add_argument('-e', '--db_desc',type=str, required=False, help='Version code for locidex db', @@ -170,16 +169,25 @@ def run(cmd_args=None): run_data['analysis_start_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") run_data['parameters'] = vars(cmd_args) - config = {} - for f in DB_CONFIG_FIELDS: - config[f] = '' + #config = {} + #for f in DB_CONFIG_FIELDS: + # config[f] = '' + + #config["db_name"] = cmd_args.name + #config["db_version"] = cmd_args.db_ver + #config["db_desc"] = cmd_args.db_desc + #config["db_author"] = cmd_args.author + #if cmd_args.date == '': + # config["db_date"] = datetime.now().strftime("%Y/%d/%m") - config["db_name"] = cmd_args.name - config["db_version"] = cmd_args.db_ver - config["db_desc"] = cmd_args.db_desc - config["db_author"] = cmd_args.author - if cmd_args.date == '': - config["db_date"] = datetime.now().strftime("%Y/%d/%m") + config = DBConfig( + db_name=cmd_args.name, + db_version =cmd_args.db_ver, + db_desc=cmd_args.db_desc, + db_author=cmd_args.author, + db_date=datetime.now().strftime("%Y/%d/%m"), + + ) if not os.path.isfile(input_file): print(f'Error {input_file} does not exist, please check path and try again') @@ -188,14 +196,14 @@ def run(cmd_args=None): #run_data['result_file'] = os.path.join(outdir) obj = locidex_build(input_file, outdir,config=config,seq_columns={'nucleotide':'dna_seq','protein':'aa_seq'},force=force) - print(outdir) + if obj.status == False: print(f'Error something went wrong building the db, check error messages {obj.messages}') sys.exit() run_data['analysis_end_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") with open(os.path.join(outdir,"config.json"),"w") as oh: - oh.write(json.dumps(obj.config,indent=4)) + oh.write(json.dumps(obj.config.to_dict(),indent=4)) with open(os.path.join(outdir,"meta.json"),"w") as oh: oh.write(json.dumps(obj.meta,indent=4)) diff --git a/locidex/constants.py b/locidex/constants.py index b8288ea..1caf7eb 100644 --- a/locidex/constants.py +++ b/locidex/constants.py @@ -1,3 +1,7 @@ + +from dataclasses import dataclass, asdict, fields +from typing import Any, Union + DNA_AMBIG_CHARS = ['b', 'd', 'e', 'f', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 'u', 'v', 'w', 'x', 'y', 'z', '-'] DNA_IUPAC_CHARS = ['b', 'd', 'e', 'f', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q', 'r', 's', 'u', 'v', 'w', 'x', 'y', @@ -59,18 +63,48 @@ EXTRACT_MODES = ['snps','trim','raw','extend'] -DB_CONFIG_FIELDS = [ - "db_name", - "db_version", - "db_date", - "db_author", - "db_desc", - "db_num_seqs", - "is_nucl", - "is_prot", - "nucleotide_db_name", - "protein_db_name", -] +@dataclass +class DBConfig: + db_name: Union[str, None] = None + db_version: Union[str, None] = None + db_date: Union[str, None] = None + db_author: Union[str, None] = None + db_desc: Union[str, None] = None + db_num_seqs: Union[str, int] = None + is_nucl: Union[bool, None] = None + is_prot: Union[bool, None] = None + nucleotide_db_name: Union[str, None] = None + protein_db_name: Union[str, None] = None + + def __getitem__(self, name: str) -> Any: + return getattr(self, str(name)) + + def __setitem__(self, key: str, value: str) -> None: + setattr(self, key, value) + + def to_dict(self) -> dict: + return asdict(self) + + @classmethod + def _keys(cls) -> list: + return [i.name for i in fields(cls)] + + def keys(self) -> list: + return [i.name for i in fields(self)] + + +#DB_CONFIG_FIELDS = [ +# "db_name", +# "db_version", +# "db_date", +# "db_author", +# "db_desc", +# "db_num_seqs", +# "is_nucl", +# "is_prot", +# "nucleotide_db_name", +# "protein_db_name", +#] SEARCH_RUN_DATA = { diff --git a/locidex/manifest.py b/locidex/manifest.py index 300b3d1..ccaa704 100644 --- a/locidex/manifest.py +++ b/locidex/manifest.py @@ -6,6 +6,7 @@ from argparse import (ArgumentParser, ArgumentDefaultsHelpFormatter, RawDescriptionHelpFormatter) from datetime import datetime from locidex.version import __version__ +from locidex.constants import DBConfig def add_args(parser=None): @@ -27,14 +28,7 @@ def run_merge(config): run_data['analysis_start_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") run_data['parameters'] = analysis_parameters - db_keys = [ - "db_name", - "db_version", - "db_date", - "db_author", - "db_desc", - "db_num_seqs", - ] + db_keys = DBConfig.keys() d = pathlib.Path(input_dir).rglob('*') config_files = {} diff --git a/tests/test_db.py b/tests/test_db.py index 668a016..7764d84 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -97,7 +97,7 @@ def temp_db_dir(tmp_path): protein_dir.mkdir() # Create mock files - config_file.write_text(json.dumps({i: "value" for i in constants.DB_CONFIG_FIELDS})) + config_file.write_text(json.dumps({i: "value" for i in constants.DBConfig._keys()})) meta_file.write_text(json.dumps({"meta": "data"})) (nucleotide_dir / "nucleotide").touch() (protein_dir / "protein").touch() @@ -112,7 +112,8 @@ def test_search_db_conf_initialization_and_blast_paths_setup(temp_db_dir): "meta": "db_meta.json" } #required_fields = ["key"] - required_fields = [*constants.DB_CONFIG_FIELDS] + #required_fields = [*constants.DB_CONFIG_FIELDS] + required_fields = [*constants.DBConfig._keys()] # Initialize search_db_conf search_conf = search_db_conf(str(temp_db_dir), db_basenames, required_fields) From b268bc4d426f31d9d0c71f93d412f6c69b556bec Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Mon, 29 Apr 2024 17:01:32 -0500 Subject: [PATCH 33/51] Added refactor for database config class The Databaseconfig fields have been refactored into a dataclass allowing for continuity of the requried fields across the modules, a larger upgrade is still needed --- locidex/constants.py | 29 +- .../example/manifest_in/passes/manifest.json | 8 - .../blast/nucleotide/nucleotide.fasta | 0 .../blast/nucleotide/nucleotide.ndb | Bin .../blast/nucleotide/nucleotide.nhr | Bin .../blast/nucleotide/nucleotide.nin | Bin .../blast/nucleotide/nucleotide.njs | 0 .../blast/nucleotide/nucleotide.not | Bin .../blast/nucleotide/nucleotide.nsq | Bin .../blast/nucleotide/nucleotide.ntf | Bin .../blast/nucleotide/nucleotide.nto | Bin .../pass_one_db/blast/protein/protein.fasta | 0 .../pass_multiple/pass_one_db/config.json | 12 + .../{ => pass_multiple}/pass_one_db/meta.json | 0 .../pass_one_db/results.json | 0 .../blast/nucleotide/nucleotide.fasta | 106 ++ .../blast/nucleotide/nucleotide.ndb | Bin 0 -> 20480 bytes .../blast/nucleotide/nucleotide.nhr | Bin 0 -> 3435 bytes .../blast/nucleotide/nucleotide.nin | Bin 0 -> 800 bytes .../blast/nucleotide/nucleotide.njs | 22 + .../blast/nucleotide/nucleotide.not | Bin 0 -> 644 bytes .../blast/nucleotide/nucleotide.nsq | Bin 0 -> 6297 bytes .../blast/nucleotide/nucleotide.ntf | Bin 0 -> 16384 bytes .../blast/nucleotide/nucleotide.nto | Bin 0 -> 216 bytes .../pass_three_db/blast/protein/protein.fasta | 106 ++ .../pass_multiple/pass_three_db/config.json | 12 + .../pass_multiple/pass_three_db/meta.json | 1181 +++++++++++++++++ .../pass_multiple/pass_three_db/results.json | 14 + .../blast/nucleotide/nucleotide.fasta | 106 ++ .../blast/nucleotide/nucleotide.ndb | Bin 0 -> 20480 bytes .../blast/nucleotide/nucleotide.nhr | Bin 0 -> 3435 bytes .../blast/nucleotide/nucleotide.nin | Bin 0 -> 800 bytes .../blast/nucleotide/nucleotide.njs | 22 + .../blast/nucleotide/nucleotide.not | Bin 0 -> 644 bytes .../blast/nucleotide/nucleotide.nsq | Bin 0 -> 6297 bytes .../blast/nucleotide/nucleotide.ntf | Bin 0 -> 16384 bytes .../blast/nucleotide/nucleotide.nto | Bin 0 -> 216 bytes .../pass_two_db/blast/protein/protein.fasta | 106 ++ .../pass_multiple/pass_two_db/config.json | 12 + .../pass_multiple/pass_two_db/meta.json | 1181 +++++++++++++++++ .../pass_multiple/pass_two_db/results.json | 14 + .../blast/nucleotide/nucleotide.fasta | 106 ++ .../blast/nucleotide/nucleotide.ndb | Bin 0 -> 20480 bytes .../blast/nucleotide/nucleotide.nhr | Bin 0 -> 3435 bytes .../blast/nucleotide/nucleotide.nin | Bin 0 -> 800 bytes .../blast/nucleotide/nucleotide.njs | 22 + .../blast/nucleotide/nucleotide.not | Bin 0 -> 644 bytes .../blast/nucleotide/nucleotide.nsq | Bin 0 -> 6297 bytes .../blast/nucleotide/nucleotide.ntf | Bin 0 -> 16384 bytes .../blast/nucleotide/nucleotide.nto | Bin 0 -> 216 bytes .../pass_one_db/blast/protein/protein.fasta | 106 ++ .../{ => pass_single}/pass_one_db/config.json | 0 .../passes/pass_single/pass_one_db/meta.json | 1181 +++++++++++++++++ .../pass_single/pass_one_db/results.json | 14 + locidex/example/manifest_in/passes/run.json | 7 - locidex/manifest.py | 230 ++-- tests/test_manifest.py | 94 ++ 57 files changed, 4580 insertions(+), 111 deletions(-) delete mode 100644 locidex/example/manifest_in/passes/manifest.json rename locidex/example/manifest_in/passes/{ => pass_multiple}/pass_one_db/blast/nucleotide/nucleotide.fasta (100%) rename locidex/example/manifest_in/passes/{ => pass_multiple}/pass_one_db/blast/nucleotide/nucleotide.ndb (100%) rename locidex/example/manifest_in/passes/{ => pass_multiple}/pass_one_db/blast/nucleotide/nucleotide.nhr (100%) rename locidex/example/manifest_in/passes/{ => pass_multiple}/pass_one_db/blast/nucleotide/nucleotide.nin (100%) rename locidex/example/manifest_in/passes/{ => pass_multiple}/pass_one_db/blast/nucleotide/nucleotide.njs (100%) rename locidex/example/manifest_in/passes/{ => pass_multiple}/pass_one_db/blast/nucleotide/nucleotide.not (100%) rename locidex/example/manifest_in/passes/{ => pass_multiple}/pass_one_db/blast/nucleotide/nucleotide.nsq (100%) rename locidex/example/manifest_in/passes/{ => pass_multiple}/pass_one_db/blast/nucleotide/nucleotide.ntf (100%) rename locidex/example/manifest_in/passes/{ => pass_multiple}/pass_one_db/blast/nucleotide/nucleotide.nto (100%) rename locidex/example/manifest_in/passes/{ => pass_multiple}/pass_one_db/blast/protein/protein.fasta (100%) create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_one_db/config.json rename locidex/example/manifest_in/passes/{ => pass_multiple}/pass_one_db/meta.json (100%) rename locidex/example/manifest_in/passes/{ => pass_multiple}/pass_one_db/results.json (100%) create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.fasta create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.ndb create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.nhr create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.nin create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.njs create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.not create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.nsq create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.ntf create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.nto create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/protein/protein.fasta create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/config.json create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/meta.json create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_three_db/results.json create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.fasta create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.ndb create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.nhr create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.nin create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.njs create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.not create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.nsq create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.ntf create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.nto create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/protein/protein.fasta create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/config.json create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/meta.json create mode 100644 locidex/example/manifest_in/passes/pass_multiple/pass_two_db/results.json create mode 100644 locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.fasta create mode 100644 locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.ndb create mode 100644 locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.nhr create mode 100644 locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.nin create mode 100644 locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.njs create mode 100644 locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.not create mode 100644 locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.nsq create mode 100644 locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.ntf create mode 100644 locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.nto create mode 100644 locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/protein/protein.fasta rename locidex/example/manifest_in/passes/{ => pass_single}/pass_one_db/config.json (100%) create mode 100644 locidex/example/manifest_in/passes/pass_single/pass_one_db/meta.json create mode 100644 locidex/example/manifest_in/passes/pass_single/pass_one_db/results.json delete mode 100644 locidex/example/manifest_in/passes/run.json create mode 100644 tests/test_manifest.py diff --git a/locidex/constants.py b/locidex/constants.py index 1caf7eb..49f0f8e 100644 --- a/locidex/constants.py +++ b/locidex/constants.py @@ -1,6 +1,7 @@ from dataclasses import dataclass, asdict, fields -from typing import Any, Union +import pathlib +from typing import Any, Union, NamedTuple DNA_AMBIG_CHARS = ['b', 'd', 'e', 'f', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 'u', 'v', 'w', 'x', 'y', 'z', '-'] @@ -92,19 +93,21 @@ def _keys(cls) -> list: def keys(self) -> list: return [i.name for i in fields(self)] +@dataclass(frozen=True) +class DBFiles: + meta_file: str = "meta.json" + config_file: str = "config.json" + results_file: str = "results.json" + blast_dir: str = "blast" -#DB_CONFIG_FIELDS = [ -# "db_name", -# "db_version", -# "db_date", -# "db_author", -# "db_desc", -# "db_num_seqs", -# "is_nucl", -# "is_prot", -# "nucleotide_db_name", -# "protein_db_name", -#] + @classmethod + def items(cls): + return [(i.name, pathlib.Path(i.default)) for i in fields(cls)] + +@dataclass(frozen=True) +class ManifestFields: + db_path: str = "path" + config_data: str = "config" SEARCH_RUN_DATA = { diff --git a/locidex/example/manifest_in/passes/manifest.json b/locidex/example/manifest_in/passes/manifest.json deleted file mode 100644 index 0bb50a4..0000000 --- a/locidex/example/manifest_in/passes/manifest.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "Locidex Database": { - "1.0.0": { - "db_relative_path_dir": "pass_one_db", - "db_relative_path_config": "pass_one_db/config.json" - } - } -} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.fasta b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.fasta similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.fasta rename to locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.fasta diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.ndb b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.ndb similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.ndb rename to locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.ndb diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nhr b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.nhr similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nhr rename to locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.nhr diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nin b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.nin similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nin rename to locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.nin diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.njs b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.njs similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.njs rename to locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.njs diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.not b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.not similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.not rename to locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.not diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nsq b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.nsq similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nsq rename to locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.nsq diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.ntf b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.ntf similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.ntf rename to locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.ntf diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nto b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.nto similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/blast/nucleotide/nucleotide.nto rename to locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/nucleotide/nucleotide.nto diff --git a/locidex/example/manifest_in/passes/pass_one_db/blast/protein/protein.fasta b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/protein/protein.fasta similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/blast/protein/protein.fasta rename to locidex/example/manifest_in/passes/pass_multiple/pass_one_db/blast/protein/protein.fasta diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/config.json b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/config.json new file mode 100644 index 0000000..331ee22 --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/config.json @@ -0,0 +1,12 @@ +{ + "db_name": "Locidex Database 1", + "db_version": "1.0.0", + "db_date": "04/04/2024", + "db_author": "test1", + "db_desc": "test1", + "db_num_seqs": 53, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/pass_one_db/meta.json b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/meta.json similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/meta.json rename to locidex/example/manifest_in/passes/pass_multiple/pass_one_db/meta.json diff --git a/locidex/example/manifest_in/passes/pass_one_db/results.json b/locidex/example/manifest_in/passes/pass_multiple/pass_one_db/results.json similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/results.json rename to locidex/example/manifest_in/passes/pass_multiple/pass_one_db/results.json diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.fasta b/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.fasta new file mode 100644 index 0000000..a03cb89 --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.fasta @@ -0,0 +1,106 @@ +>0 +AAATTCCGTCCCGGACATGCGGACTACACCTATCACCAAAAATACGGTGTGCGAGATTACCGTGGCGGCGGCCGTTCATCGGCACGTGAAACCGCCATGCGTGTTGCTGCGGGAGCGATTGCCAAAAAATATCTGCAGCAAGAGTTTGGCATTGAAGTGCGTGCTTACTTGTCGCAAATGGGGGATGTCGCGATTGATAAAGTGGATTGGAATGAGATTGAAAACAACGATTTCTTCTGTCCTGATGTCGATAAAGTGGCTGCGTTTGACGAGCTGATCCGCGAGCTGAAAAAAGAAGGCGATTCGATCGGCGCGAAAATCCAAGTGGTCGCTACAGGCGTGCCGGTTGGACTGGGTGAGCCTGTGTTTGATCGCTTAGATGCGGATATTGCCCATGCCTTGATGAGCATCAACGCCGTGAAAGGAGTCGAGATTGGTGATGGCTTTGATGTGGTGCGCCAAAAAGGCAGCCAACACCGTGACCCGCTCACTCCACAAGGT +>1 +GTTTTCCGCCCGGGCCATGCCGACTATACCTACGAGCAGAAATACGGTCTGCGCGATTACCGTGGCGGCGGTCGTTCTTCCGCCCGTGAAACGGCGATGCGCGTCGCGGCTGGCGCGATTGCTAAAAAATATCTGGCGGAGAAACACGGCATCGTCATTCAGGGGTGTCTGACCCAGATGGGCGATATTCCGCTTGAAATCAAAGACTGGCAGCAGGTTGAACAAAACCCGTTTTTCTGTCCTGATCCAGATAAAATCGACGCGCTGGATGAACTGATGCGCGCCCTGAAGAAAGAGGGCGATTCGATTGGGGCAAAAGTGACCGTCGTGGCAAACGGCGTTCCGGCCGGGCTTGGCGAACCGGTCTTTGACCGTCTGGATGCGGACATCGCTCATGCGCTGATGAGCATCAACGCGGTAAAAGGCGTGGAGATTGGCGATGGGTTTGATGTGGTCGCGTTGCGAGGCAGCCAGAATCGCGATGAAATTACCAAAGAGGGC +>2 +GTTTTCCGTCCAGGACACGCTGACTATACCTATGAGCAGAAATATGGCCTGCGCGACTACCGTGGCGGCGGACGTTCATCCGCGCGTGAAACGGCGATGCGCGTTGCGGCTGGCGCGATTGCCAAAAAATATCTGGCGGAAAAATTCGGCGTTGAAATTCGCGGCTGTCTGACGCAGATGGGGGATATTCCGCTGGAGATCAAAGACTGGTCTCAGGTGGAGCTTAACCCGTTCTTTTGTCCAGACCCGGATAAAATCGAAGTGCTGGACGAACTGATGCGCGGGCTGAAGAAAGAGGGCGACTCCATCGGGGCAAAAGTGACCGTTGTTGCAAGCGGCGTACCGGCGGGTCTCGGCGAACCTGTATTCGACCGTCTGGATGCCGACATCGCCCATGCGCTGATGAGCATTAACGCCGTTAAGGGCGTTGAGATTGGCGACGGTTTTGACGTTGTTGCGCTGCGCGGCAGTCAGAACCGCGATGAGATCACCAAAGAAGGT +>3 +GTTTTCCGCCCAGGGCATGCTGATTATACCTATGAACAAAAATATGGTTTGCGTGATTATCGTGGTGGTGGACGTTCTTCTGCTCGTGAAACGGCAATGCGTGTCGCCGCAGGTGCGATTGCTAAAAAATATCTAAAAGAGAAATTAGGCATCGAAGTTCGAGGATATCTTTCTCAGCTAGGACCTATTACATGTGATCTTGTTGATTGGTCTATTGTTGAAAGCAATCCATTTTTCTGTCCTGATCCTTCACGTTTAGATGCGCTTGATGAATACATGCGTGCACTTAAAAAAGAAGGTAATTCTATTGGTGCAAAAGTCACTGTGGTTGCACAGGGTGTACCTGCTGGATTTGGTGAACCTGTCTTTGATCGATTAGATGCTGATTTAGCGCATGCTTTGATGAGTATCAATGCTGTCAAAGGTATAGAAATTGGTGATGGATTTGGTGTTGTAACATTAAAAGGTACAGAAAACCGAGATGAAATCACTAAAAAGGGA +>4 +GTTTTCCGTCCAGGCCATGCCGATTACACCTACGAACAAAAATACGGTCTGCGCGATTATCGCGGCGGCGGGCGCTCTTCCGCCCGCGAAACCGCCATGCGCGTGGCGGCAGGGGCGATTGCAAAAAAATATCTCGCCGAGAAATTTGGCATTGAGATTCGCGGCTGCCTGACCCAGATGGGTGACATTCCGCTGGAAATCAAAGACTGGTCGCAGGTCGAGCAAAATCCGTTTTTCTGCCCGGACCCGGACAAAATCGACGCGTTAGATGAACTGATGCGCGCGCTGAAAAAAGAGGGCGACTCCATCGGCGCGAAAGTCACCGTTGTTGCCAGTGGCGTCCCCGCCGGACTTGGCGAGCCGGTCTTTGACCGCCTGGATGCCGACATCGCCCATGCGCTGATGAGCATCAACGCGGTGAAAGGCGTAGAAATTGGTGATGGTTTTGACGTGGTGGCGCTGCGTGGCAGCCAGAACCGCGACGAAATCACCAAAGACGGT +>5 +GTTTTCCGTCCTGGTCACGCCGACTATACCTACGAACAAAAATATGGCTTTCGCGACTATCGCGGCGGCGGGCGTTCTTCCGCGCGTGAAACCGCGATGCGCGTGGCGGCAGGGGCAATTGCCAAAAAATATCTCCAGCAGAAATTCGGCATCGTTATCCGCGGCTGTCTGTCCCAGATGGGCGACATTCCGCTGGCAATCAAAGACTGGGATCAGGTAGAGCTCAACCCGTTCTTCTGCGCCGATGCCGACAAGCTGGACGCGCTGGATGAGCTGATGCGTGGCCTGAAAAAAGAGGGCGACTCCATTGGTGCGAAAGTCACCGTGGTGGCCGACGGCGTGCCGGCTGGCTGGGGCGAGCCGGTATTTGACCGCCTTGACGCCGACATCGCCCACGCGCTGATGAGCATCAACGCGGTGAAAGGCGTCGAAATCGGCGACGGTTTTGACGTGGTCAAGCTTCGCGGCAGCCAGAACCGCGACGAAATCACGAAGGCGGGT +>6 +GTGTTCCGTCCGGGGCACGCGGATTACACCTACGAACAAAAATACGGCCTGCGCGACTATCGCGGCGGCGGGCGTTCATCCGCCCGTGAAACCGCCATGCGCGTCGCGGCAGGCGCTATCGCCAAAAAATATCTGGCGCAGAAATTCGGCGTGGTGATTCGCGGCTGCCTGACCCAGATGGGTGATATTCCGCTGGAAATCAAAGACTGGGATCAGGTAGAGCAAAACCCGTTCTTCTGCCCGGACCCGGATAAAATCGAGGCGCTGGATGAGCTGATGCGCGCTCTGAAAAAAGAGGGCGATTCCATCGGCGCGAAAGTCACCGTGGTGGCCGACAGCGTGCCCGCCGGGCTTGGCGAGCCGGTATTTGACCGCCTGGACGCCGATATCGCCCACGCGCTGATGAGCATTAACGCCGTGAAGGGCGTGGAAATCGGCGACGGTTTCGGCGTGGTGCAACTGCGCGGCAGCCAGAACCGCGACGAAATCACCACTGCCGGT +>7 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAGCCAGGCGCCACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGCGGCCTGCCGGAGGGCGCGGAGATTGCCGTTCAGTTGGAAGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGCCGCTTCTCGCTGTCTACGCTGCCTGCCGCCGATTTCCCGAATCTTGACGACTGGCAAAGCGAAGTTGAATTTACGCTGCCGCAGGCCACGATGAAGCGCCTGATTGAAGCGACCCAGTTTTCGATGGCCCATCAGGATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAGCGAACTGCGCACTGTTGCGACCGACGGCCACCGTCTGGCGGTGTGCTCAATGCCGCTGGAGGCGTCTTTACCTAGCCACTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGTGGCGAAAACCCGCTGCGCGTGCAG +>8 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAACCCGGCGCTACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCCTGCCGGAAGGGGCGGAAATCGCCGTTCAGCTGGAGGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGTCGCTTTTCGCTGTCTACCTTACCGGCAGCAGACTTCCCGAATCTGGATGACTGGCAAAGCGAAGTGGAATTCACGCTGCCTCAGGCGACGATGAAACGCTTGATTGAGGCCACCCAGTTTTCGATGGCCCATCAGGACGTGCGCTACTACCTGAACGGTATGTTGTTTGAAACGGAAGGAAGCGAACTGCGCACCGTCGCGACCGACGGCCACCGTCTGGCGGTCTGTTCAATGCCGCTGGAGGCCTCTTTACCGAGCCATTCAGTGATCGTACCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTTGACGGCGGTGAAAATCCACTGCGTGTACAG +>9 +ATGGAAATGGTGGCGCGCGTTGCGTTGATTCAGCCTCATGAACCAGGCGCAACTACCGTCCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCTTGCCGGAAGGGGCTGAAATTGCCGTCCAGCTGGAAGGCGATCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTCTCGCTTTCCACGCTGCCTGCCGCCGATTTCCCTAATCTGGATGACTGGCAGAGCGAAGTCGAATTCACCCTGCCGCAGGCAACGATGAAGCGCCTGATTGAAGCCACCCAGTTCTCAATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAGACTGAAGGTGAAGAGTTGCGTACCGTCGCGACCGACGGTCACCGTCTGGCGGTCTGCTCTATGCCGGTCGGGCAATCTCTGCCTAACCATTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAGCTGATGCGTATGCTCGACGGCGGCGAAACCCCGCTGCGCGTACAG +>10 +ATGGAGATGGTGGCGCGCGTGGCGCTGATCCAGCCTCATGAACCTGGTGCGACCACCGTTCCGGCGCGTAAATTCTTCGATATTTGCCGTGGATTACCAGAAGGGGCGGAAATTGCCGTTCAACTGGAAGGCGACCGTATGCTGGTGCGTTCTGGCCGCAGCCGTTTCTCGCTGTCTACGCTGCCTGCCGCCGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTCGAATTCACCCTGCCACAGGCGACAATGAAGCGCCTGATTGAAGCCACGCAGTTTTCGATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAAACCGAAGGGGAAGAGTTGCGTACCGTGGCGACCGACGGTCACCGCCTGGCGGTCTGTTCAATGCCTGTCGGTCAGCCGTTGCCTAGCCATTCGGTGATCGTACCGCGTAAAGGTGTGATTGAACTGATGCGTATGCTCGACGGCGGCGATAACCCGCTGCGCGTGCAG +>11 +ATGGAAATGGTGGCACGCGTTGCGCTGGTTCAGCCGCACGAACCAGGGGCGACGACCGTTCCAGCGCGCAAATTCTTTGATATCTGCCGTGGTCTGCCTGAAGGCGCGGAAATTGCCGTGCAGCTGGAAGGTGAGCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTTTCGCTGTCTACCCTGCCAGCGGCGGATTTCCCGAATCTCGATGACTGGCAGAGCGAAGTCGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCGACCCAGTTTTCTATGGCGCATCAGGACGTTCGCTATTACTTAAACGGTATGCTGTTTGAAACCGAAGGTGAAGAACTGCGCACCGTGGCGACCGACGGCCACCGTCTGGCAGTCTGTTCAATGCCAATTGGTCAATCTTTGCCAAGCCATTCGGTGATCGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGCGGCGACAATCCGCTGCGCGTGCAG +>12 +ATGGAAATGATCGCGCGCGTTACGCTGACTCAGCCGCACGACGCGGGCGCGACCACGGTTCCGGCACGTAAATTCTTTGATATTTGCCGTGGGCTGCCGGAAGGCGCTGAAATCGCAGTGCAGCTGGAGGGCGACCGCATGCTGGTGCGCTCTGGCCGCAGCCGTTTCTCCCTCTCCACGTTGCCCGCTGCGGACTTCCCGAACCTGGATGACTGGCAGAGCGAAGTTGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCCACGCAGTTCTCCATGGCGCATCAGGACGTTCGTTACTACTTAAACGGCATGCTGTTCGAAACCGAAGGTGAAGAGCTGCGTACCGTGGCGACCGACGGTCACCGTCTGGCGGTTTGTTCCATGCCGATTGGCGATTCACTGCCAAACCATTCGGTGATCGTACCGCGTAAAGGCGTAATTGAACTGATGCGTATGCTCGACGGCGGTGAAACGCCGCTGCGCGTGCAG +>13 +ATGGAGATGATCGCGCGTGTGGCGCTGTCGCTACCGCACCAGGCGGGCGCGACCACCGTGCCGGCGCGCAAATTCTTCGATATCTGCCGTGGCTTGCCGGAAGGGGCGGAAATCGCCGTTACGCTGGAAGGCGACAGAATGCTGGTGCGCTCCGGGCGCAGCCGCTTCTCGCTGTCTACGTTACCGGCGGCAGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTGGAGTTCACGCTCCCGCAGGCCACCATGAAGCGCCTGATCGAAGCGACCCAGTTCTCCATGGCCCATCAGGACGTGCGGTATTACCTGAACGGGATGCTGTTTGAAACCGAAGGCGAAGAGCTGCGCACCGTGGCGACTGACGGCCACCGTCTGGCGGTATGCGCGATGCCGGTAGGCCAACCGCTGCCAAACCATTCGGTGATTGTACCGCGTAAAGGCGTGCTGGAGCTGATGCGTATGCTCGATGGCGGCGACAGCCCGCTGCGCATTCAG +>14 +TCGGCGCTGACGGAAAACGATCTGGTCTTCGCCCTCTCGCAGCACGCCGTCACCTTTGCAGATGCCGAGCTTCAGCAACAAGGGAAAAGCTGGCCCTCCCTTCCGCGTTATTTTGCCATTGGTCGCACAACGGCGCTGGCGCTGCATACCGTTAGCGGTTTCAATATTCACTACCCTCTGGATCGGGAAATTAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGCGCGCTTATATTACGCGGCAATGGTGGCCGTGAGCTGATAGGTGAAACCCTGACAGCACGCGGAGCTGATGTCGATTTTTGTGAATGTTATCAACGCAGTGCAAAATATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCTCGTGGTGTGACCACGGTGGTTGTCACCAGCGGAGAGATGCTACAA +>15 +GCGGCGCTGGGGGAGAGCGATCTGTTGTTTGCCCTCTCGCAACACGCGGTTGCTTTTGCCCAATCACAGCTGCATCAGCAAGATCGTAAATGGCCCCGACTACCTACTTATTTCGCCATTGGACGCACCACCGCACTGGCGCTACATACCGTAAGCGGACAGAAGATTCTCTACCCGCAGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGTGCGCTGATATTACGTGGCAATGGCGGTCGTGAGCTAATTGGGGATACCCTGACGGCGCGCGGTGCTGAGGTCACTTTTTGTGAATGTTATCAACGATGCGCAATCCATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCCCGCGAGGTGACGACGGTCGTTGTTACCAGCGGTGAAATGTTGCAG +>16 +GCGACGTTGACGGAAAACGATCTGGTTTTTGCCCTTTCACAGCACGCCGTCGCCTTTGCCCACGCCCAACTCCAGCGAGATGGTCGAAACTGGCCTGCGTCGCCGCGCTATTTCGCGATTGGTCGCACCACGGCGCTCGCCCTTCATACCGTTAGCGGGTTCGATATTCGTTATCCATTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGCGCGCTGATTTTGCGTGGCAATGGCGGTCGCGGTCGCGAACTGCTGGGCGAAACCCTGACAGCTCGCGGAGCCGAAGTCAGTTTTTGTGAATGTTATCAACGAAGTGCGAAACATTACGATGGCGCAGAAGAGGCGATGCGCTGGCACACTCGCGGCGTAACGACGCTTGTTGTCACCAGCGGCGAGATGTTGCAA +>17 +GCGGCGCTCACGGACAACGATCTGGTGTTCGCCCTCTCGCAACACGCCGTCGCCTTTGCCCACGCCCAACTGCAACAGCAGGAGCTGGACTGGCCTGTGCAACCACGCTACTTCGCCATCGGGCGCACAACGGCGCTGGCGCTGCATACCGTTAACGGATGCGATATTCGCTATCCTCTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGAGCGCTTATTTTACGGGGCAACGGCGGGCGTGAACTGTTAGGCAAAACCCTCACAGAACGCGGCGCTGAAGTCACCTTTTGTGAATGTTATCAACGCAGTGCAAAACATTACGATGGCGCGGAAGAGGCGATGCGCTGGCACTCTCGCGGCGTGACGACGATTGTTGTCACCAGCGGCGAAATGCTGCAA +>18 +GAAACACTTGGCGATAACGATCTGCTCTTTGCACTTTCTCAACATGCAGTGTCATTCGCCCATGCGCAGTTGCAACAGCAGGGGCTAAACTGGCCATCACTTCCGCATTATTTCGCTATTGGCCGTACTACCGCTCTCGCCCTGCACACCGTAAGCGGACATAAGATTCGCTATCCACAAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCGGAATTACAAAGTATTGCGGGAAAACGCGCACTTATTTTGCGCGGTAACGGCGGCCGTGAATTGATCGGTCAGACGCTGACATCACGTGGTGCCGACGTTACTTTTTGTGAATGTTATCAACGCAGTGCGAAGCATTACGATGGTGCGGAAGAAGCTATGCGCTGGCAGTCTCGCGGCGTAACAACCGTCGTTGTAACCAGCGGTGAAATGCTGCAA +>19 +CGTCTCTTGCAGGAAGGCGATCTGCTCTTTGCGCTGTCGCAGCATGCCGTGGAGTTTGCCCATGCGCAGCTGCAACAGCATGCCGTTAGCTGGCCTCACGCCCCCCGCTATTTCGCCATCGGGCGCACCACGGCGCTGGCCTTACATACCGCGAGCGGAATCGATGTTCGTTACCCGTTAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAACCATTGCCGGAAAGCGCGCGCTCATTTTGCGCGGCAACGGTGGCCGCGAACTGCTGGGCGAAACGCTGCGCGAACGCGGCGCAGACGTGACGTTTGTGGAGTGCTATCAGCGCTGTGCGAAACACTATGATGGCGCGGAAGAAGCAATGCGCTGGCACGCCCGCGGTATTAATACGCTGGTGGTCACCAGCGGTGAAATGTTACAA +>20 +ATTGCGGGATGCCAGAAGGTGGTTCTGTGCTCGCCGCCACCCATCGCTGATGAAATCCTCTATGCGGCGCAACTGTGTGGCGTGCAGGAAATCTTTAACGTCGGCGGCGCGCAGGCGATTGCCGCTCTGGCCTTCGGCAGCGAGTCCGTACCGAAAGTGGATAAAATTTTTGGCCCCGGCAACGCCTTTGTAACCGAAGCCAAGCGTCAGGTCAGCCAGCGTCTCGACGGCGCGGCTATCGATATGCCAGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCAACACCGGATTTCGTCGCTTCTGACCTGCTCTCCCAGGCTGAGCACGGCCCGGATTCCCAGGTGATCCTGCTGACGCCGGATGCTGACATTGCCCGCAAGGTGGCGGAGGCGGTAGAACGTCAACTGGCGGAACTGCCGCGCGCGGGCACCGCCCGGCAGGCCCTGAGCGCCAGTCGTCTGATTGTGACCAAAGATTTAGCGCAGTGCGTC +>21 +ATTGCCGGATGCAAAAAAGTGGTGTTGTGCTCGCCACCGCCTATCGCGGATGAAATCCTTTACGCTGCGCAGCTGTGCGGCGTGCAGGAAATCTTCAACGTCGGCGGCGCCCAGGCCATTGCCGCTCTGGCGTTCGGCAGCGAATCCGTGCCAAAAGTGGACAAAATTTTTGGCCCCGGCAACGCGTTTGTCACCGAGGCGAAACGCCAGGTCAGCCAGCGTCTCGACGGCGCGGCAATTGATATGCCTGCCGGCCCTTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCCACGCCAGATTTCGTGGCGTCTGACCTGCTCTCTCAGGCGGAACACGGCCCGGATTCTCAGGTCATCCTGCTGACCCCGGATGCCGGTATTGCGCAGAACGTCGCAGAGGCCGTCGAACGCCAGTTAGCGGAGTTACCGCGTGCAGAAACGGCGCGTCAGGCATTAAGCGCCAGCCGTCTGATCGTGACGAAAGACTTAGCCCAGTGCGTC +>22 +ATTGCAGGCTGTAAAAAAGTGGTGTTGTGCTCTCCCCCACCTATCGCCGATGAAATTCTGTATGCTGCGCAGCTCTGCGGCGTACAGGATGTGTTTAACGTTGGGGGCGCACAAGCTATTGCCGCGCTGGCATTTGGCAGTGAATCCGTGCCGAAAGTGGACAAAATTTTTGGCCCCGGTAATGCCTTTGTGACCGAAGCCAAACGTCAGGTGAGTCAGCGTCTGGACGGCGCCGCCATCGATATGCCAGCAGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCCACGCCGGATTTCGTTGCCTCTGACTTACTCTCGCAGGCCGAACACGGCCCCGATTCCCAAGTGATCCTGCTGACGCCGGATGCCGGTATGGCCAGCCGGGTTGCTGAAGCAGTAGAACGCCAGCTTGCAGCGCTGCCACGCGCTGAAACCGCGCGGCAGGCGTTAAGCGCCAGTCGTCTGATTGTCACCCGCTCCCTTGCGCAATGCGTA +>23 +ATTGCGGGCTGTAAAAAAGTGGTGCTGTGCTCACCGCCGCCGATTGCCGATGAGATCCTTTACGCGGCGCAGCTGTGCGGTGTGCAGGACGTGTTTAACGTCGGCGGCGCACAGGCCATTGCCGCGCTGGCGTTTGGTACAGAATCCGTGCCGAAAGTGGACAAAATCTTCGGGCCAGGTAACGCCTTTGTCACCGAGGCAAAACGTCAGGTGAGCCAGCGTCTGGACGGTGCGGCGATCGATATGCCCGCAGGCCCGTCGGAAGTGCTGGTGATTGCTGACAGCGGCGCAACGCCGGATTTCGTGGCTTCTGATTTGCTCTCCCAGGCTGAACACGGCCCGGACTCTCAGGTGATTTTACTGACGCCCGCTGCTGATATGGCGCGTCGCGTAGCCGAAGCTGTCGAACGCCAGCTGGCAGAACTGCCGCGAGCTGAAACCGCCCGCCAGGCACTGAACGCCAGCCGCCTGATCGTGACTAAAGATTTAGCGCAGTGCGTG +>24 +ATTGCCGGTTGTCAGAAGGTGGTGCTCTGCTCTCCTCCACCGATCGCCGATGAGATCCTGTACGCGGCGAAGCTGTGCGGCGTGCAGGCGATCTATAAAGTGGGCGGTGCGCAGGCGATTTCTGCCCTGGCGTTCGGAACAGTATCCATTCCTAAGGTCGACAAAATCTTTGGCCCGGGCAATGCCTACGTGACCGAGGCGAAGCGCCAGGTCAGCCAGCGTCTGGACGGCGCGGCGATTGATATGCCTGCCGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCTACACCGGATTTCGTGGCCTCTGACCTGCTCTCGCAGGCCGAGCACGGCCCTGACTCGCAGGTGATTTTACTGACGCCAGATGCCGACATGGCAAAACGCGTGGGCGACGCCGTTGAGCGTCAGCTGGCTGACCTGCCGCGGGCGGAAACGGCGCGTCAGGCGCTATCCGCCAGCCGCCTGATTGTGGCCCGCGATCTTGACCAGTGCATC +>25 +ATCGCCGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTACGCCGCGCAACTCTGTGGCGTGAAAGAAGTGTTTAACGTGGGTGGCGCACAGGCCATTGCCGCGCTGGCGCTGGGCACGGAGTCTATTCCAAAAGTCGATAAAATCTTTGGGCCGGGCAACGCCTATGTGACCGAAGCCAAGCGCCAGGTCAGCCAGCGTCTTGACGGCGCGGCAATCGATATGCCCGCCGGACCGTCCGAAGTATTGGTTATCGCCGACAGCGGCGCAACGCCGGATTTTGTCGCCTCCGACCTGCTTTCTCAGGCCGAGCACGGCCCAGACTCGCAGGTGATCCTGCTGACGCCGGACGCTAAGCTTGCCGAGGGCGTGGCCGAAGCCGTTGAACGCCAGCTCGCCGAGCTGTCCCGCGCCGACACCGCGCGTCAGGCGCTCTCCGCCAGCCGTTTAATCGTAGCGAAAGATCTGGCGCAGTGCGTG +>26 +ATCGCGGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTATGCGGCGCGTTTGTGCGGGGTACAGCAGGTCTATCAGGTGGGCGGCGCTCAGGCCATCGCGGCGCTGGCGTTTGGCACCGAGACCGTACCCAAAGTGGACAAAATCTTCGGGCCGGGCAATGCGTTTGTCACCGAAGCCAAACGTCAGGTCAGCCAGCGGCTGGATGGCGCGGCGATTGATATGCCTGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGATAGCGGCGCGACCACGGATTTCGTGGCCTCGGATTTGCTGTCCCAGGCGGAACACGGCCCGGATTCGCAGGTGATCCTGCTGACACCGGACAGCGCCATGGCGCAGGCGGTGGCCGACGCGGTTGAGCGTCAACTCGCCGAACTGCCGCGCGCGGAAACAGCTCGCCAGGCGCTGGCGGAAAGCCGCCTGATTGTGGCGCGCGATTTAGCGCAGTGCGTG +>27 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAATTTTTGACATTCTGGATATTCCGCACCATGTCGAAGTGGTTTCTGCTCACCGTACCCCCGATAAACTGTTCAGCTTTGCCGAAAATGCTGAAGAAAACGGCTTTCAGGTAATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCAGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTTGGCGTTCCGGTACAAAGCGCTGCGCTAAGCGGTGTGGACAGTCTCTATTCTATTGTACAGATGCCGCGCGGTATTCCGGTTGGCACACTGGCCATCGGCAAAGCTGGCGCCGCTAACGCGGCGCTGCTGGCGGCGCAAATTCTGGCCACCCACGATAACGCACTGCATCAGCGCCTTCGCGAC +>28 +AGCGACTGGACTACCATGCAATTCGCCGCCGAAATTTTTGAAATTCTGGATGTTCCGCACCATGTAGAAGTGGTTTCCGCCCATCGAACCCCTGATAAACTGTTCAGCTTCGCCGAAACGGCGGAAGAGAACGGATATCACGTGATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACATTGGTGCCGGTACTCGGCGTTCCGGTACAAAGCGCAGCATTAAGCGGTGTGGATAGCCTTTACTCCATTGTTCAGATGCCGCGTGGCATTCCGGTGGGTACACTGGCTATCGGCAAAGCCGGGGCTGCGAACGCCGCGCTGCTGGCAGCGCAAATTTTGGCCACACACGATAATGCGCTGCACCAGCGCCTGAGCAAC +>29 +AGCGACTGGGCTACCATGCAGTTCGCCGCAGAAATCCTCGATATTCTGAACGTACCTCACCATGTTGAAGTGGTTTCCGCCCACCGCACGCCCGATAAACTGTTCAGCTTCGCCGAAGACGCCGAAAGCAACGGTTATCAGGTGATTATTGCCGGTGCCGGCGGCGCTGCGCACTTACCCGGAATGATTGCCGCCAAAACGCTGGTCCCGGTATTAGGTGTACCCGTCCAGAGCGCCGCATTAAGCGGTGTCGATAGCCTCTACTCCATCGTGCAGATGCCGCGCGGCATTCCGGTCGGTACGCTGGCGATCGGTAAAGCCGGTGCCGCTAACGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>30 +AGCGACTGGGCTACCATGCAGTTCGCCGTCGAAATCTTCGAAATCCTGAATGTCCCGCACCACGTTGAAGTGGTTTCTGCTCACCGCACCCCCGATAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTATCAGGTGATTATTGCGGGCGCAGGCGGCGCAGCGCACCTGCCAGGCATGATTGCCGCCAAAACGCTGGTGCCGGTGCTGGGCGTGCCAGTACAGAGCGCCGCACTGAGCGGTGTCGATAGCCTCTACTCCATCGTACAAATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATTGGTAAAGCTGGCGCGGCAAACGCGGCATTACTGGCAGCACAAATTCTCGCGACTCACGATAAAGAGCTACACCAGCGTCTGAATGGC +>31 +AGCGACTGGGCTACCATGCAGTTTGCCGCCGAAATCTTCGATATCCTGAACGTTCCACACCACGTTGAAGTGGTTTCCGCACACCGCACCCCCGATAAGCTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAAGGGTTATCAGGTGATTATTGCCGGTGCTGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTGGGCGTGCCGGTGCAAAGCGCTGCGCTGAGCGGCGTGGACAGCCTCTACTCTATCGTCCAGATGCCGCGCGGCATTCCGGTCGGCACGCTGGCGATCGGCAAAGCGGGCGCGGCGAACGCGGCGTTACTGGCAGCGCAAATTCTGGCGACACACGATAAAGACCTGCGCCAACGTCTGGCGGAC +>32 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTCGAAATGCTGGACGTTCCGCACCATGTTGAAGTCGTCTCAGCCCACCGTACCCCTGATAAACTGTTCAGCTTCGCCGAAAGCGCTGAAGAAAACGGTTATCAGGTTATTATTGCGGGTGCTGGCGGTGCAGCGCATCTGCCGGGCATGATTGCAGCGAAAACGCTGGTCCCCGTGTTAGGCGTTCCGGTACAAAGCGCAGCGTTGAGCGGCGTAGATAGCCTCTACTCAATCGTGCAGATGCCACGCGGCATCCCCGTGGGTACGCTGGCGATTGGGAAAGCGGGTGCGGCAAATGCGGCCCTGCTGGCAGCACAAATTCTGGCAACACACGACAAAGCATTACATCAGCGTCTGAGCGAC +>33 +AGTGACTGGGCAACCATGTCTCATGCCGCAGATGTATTAGATACACTACAAATTCCTTACCATGTTGAGATTGTCTCTGCACACCGAACCCCTGATAAGTTATTTAGTTTTGCTGAAAAAGCAAAAAGTAATGGCTTTGATGTCATTATTGCTGGTGCAGGAGGAGCTGCCCATTTACCAGGAATGCTTGCAGCTAAAACGTTAGTACCCGTATTTGGTGTTCCTGTTCAAAGTGCGACATTAAGCGGTGTTGATAGCCTCTATTCAATCGTACAAATGCCAAAAGGTATCCCTGTAGGAACCTTAGCGATTGGTAAAGCAGGGGCTGCCAATGCGGCTTTATTAGCGGCTCAAGTTTTAGCGTTACATTCTCCTGCTATTTTAGATGCATTGACTGCA +>34 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTTGAAATCCTGAATGTTCCGCACCACGTCGAAGTGGTTTCCGCACACCGTACCCCGGACAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTACGAGGTGATCATTGCCGGTGCGGGCGGCGCAGCACATCTGCCGGGCATGATTGCCGCCAAAACGCTGGTGCCGGTACTGGGTGTTCCCGTGCAAAGCGCCGCGTTAAGCGGGGTGGATAGCCTTTACTCTATTGTCCAGATGCCGCGCGGTATTCCTGTCGGTACCCTGGCGATTGGTAAAGCAGGTGCGGCAAATGCCGCCCTGCTGGCCGCGCAGATCCTGGCGACGCATGATAAAGATTTGCACCAGCGTCTGGCGGAG +>35 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAACGGCGGAAGAGAACGGATATCAAGTGATTATTGCCGGCGCGGGCGGCGCGGCGCACCTGCCGGGAATGATTGCGGCAAAAACGCTGGTCCCGGTACTCGGCGTGCCGGTACAAAGCGCTGCGCTAAGCGGCGTGGATAGCCTTTACTCCATTGTGCAGATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATCGGTAAAGCCGGTGCGGCTAATGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>36 +AGCGACTGGGCCACCATGCAGCATGCCGCTGAAATTCTTGATGCCCTTGATGTTCCTTACCATGTTGAAGTGGTTTCCGCTCACCGCACGCCTGATAAGCTTTTCAGCTTTGCTGAATCCGCGCAGCACAACGGTTATCAGGTGATTATTGCTGGCGCAGGCGGTGCGGCGCATCTGCCGGGCATGATCGCCGCGAAAACCCTGGTGCCGGTATTAGGCGTGCCGGTGCAAAGCGCGGCCCTGAGCGGCGTGGACAGCCTCTACTCTATCGTGCAAATGCCGCGCGGCATTCCGGTAGGGACGCTGGCGATCGGCAAAGCGGGTGCTGCAAACGCCGCACTGCTGGCGGCGCAGATCCTCGCCCAGCATGACGATGCGCTACTGGCGCGTCTGGCGGCA +>37 +AAACGCTTCCTGAACGAACTGACCGCCGCTGAAGGGCTGGAACGTTATCTGGGCGCCAAATTCCCGGGTGCGAAACGTTTCTCGCTCGAGGGGGGAGATGCGCTGATACCTATGCTGAAAGAGATGGTTCGCCATGCGGGTAACAGCGGCACTCGCGAAGTGGTGCTGGGGATGGCGCACCGCGGTCGTCTGAACGTGCTGATCAACGTACTGGGTAAAAAACCGCAGGATCTGTTCGACGAGTTTGCCGGTAAACATAAAGAACATCTGGGTACCGGCGACGTGAAGTATCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGTCTGGTTCACCTGGCGCTGGCGTTTAACCCATCGCATCTGGAAATTGTGAGCCCGGTGGTGATGGGCTCCGTGCGCGCCCGTCTGGACCGACTGGACGAACCGAGCAGTAATAAAGTGCTGCCGATCACTATTCACGGCGACGCCGCGGTGACCGGCCAGGGCGTGGTTCAG +>38 +AAACGCTTCCTGAACGAACTGACCGCTGCAGAAGGGCTGGAACGTTATCTGGGGGCAAAATTCCCTGGCGCGAAACGTTTTTCGCTGGAAGGCGGCGATGCGTTAATTCCGATGCTCAAAGAGATGGTCCGCCATGCGGGCAACAGCGGCACCCGCGAAGTGGTGTTGGGAATGGCGCACCGTGGTCGCCTGAACGTACTGGTCAACGTGCTGGGTAAAAAACCTCAGGATCTGTTTGACGAGTTTGCCGGTAAACATAAAGAACATTTGGGCACCGGCGACGTGAAGTACCATATGGGTTTCTCGTCGGATATCGAAACCGAAGGCGGACTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTCAGCCCGGTAGTGATGGGGTCTGTGCGCGCACGTCTCGACCGGCTCGACGAACCGAGCAGCAACAAAGTGTTGCCAATCACCATTCATGGTGATGCAGCAGTTACCGGGCAGGGCGTGGTTCAG +>39 +AAACGCTTCTTAAGCGAACTGACCGCCGCTGAAGGCCTTGAACGTTACCTCGGCGCAAAATTCCCTGGCGCAAAACGCTTCTCGCTGGAAGGCGGTGACGCGTTAATCCCGATGCTTAAAGAGATGATCCGCCACGCTGGCAACAGCGGCACCCGCGAAGTGGTTCTCGGGATGGCGCACCGTGGTCGTCTGAACGTGCTGGTGAACGTGCTGGGTAAAAAACCGCAAGACTTGTTCGACGAGTTCGCCGGTAAACATAAAGAACACCTCGGCACGGGTGACGTGAAATACCACATGGGCTTCTCGTCTGACTTCCAGACCGATGGCGGCCTGGTGCACCTGGCGCTGGCGTTTAACCCGTCTCACCTTGAGATTGTAAGCCCGGTAGTTATCGGTTCTGTTCGTGCCCGTCTGGACAGACTTGATGAGCCGAGCAGCAACAAAGTGCTGCCAATCACCATCCACGGTGACGCCGCAGTGACCGGGCAGGGTGTGGTTCAG +>40 +AAACGCTTCCTCAGCGAACTGACTGCAGCGGAAGGTCTGGAACGCTACCTGGGCGCGAAATTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGTGATGCGTTAATCCCAATGCTCAAAGAGATGATCCGCCACGCCGGTAACAGCGGTACCCGTGAAGTGGTACTGGGTATGGCGCACCGTGGTCGTCTGAACGTCCTGGTTAACGTGCTGGGTAAAAAGCCGCAGGATCTATTCGACGAATTTGCGGGCAAACATAAAGAACACCTCGGTACCGGTGACGTGAAGTACCACATGGGCTTCTCATCGGATATCGAAACCGAAGGCGGTCTGGTGCATCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTTATCGGTTCCGTACGTGCACGCTTGGATCGTCTGGACGAGCCGAGCAGCAATAAAGTGCTGCCAATCACTATTCATGGTGATGCGGCAGTAACCGGGCAAGGCGTGGTTCAG +>41 +CGTACTTTCCTTGAAGAGCTGACTGCCGCTGAAGGTTTAGAGCGCTATCTTGGTGCGAAATTCCCTGGTGCTAAACGTTTCTCTCTCGAAGGGGGGGATGCCTTAGTTCCGATGACCAAAGAGATGATCCGTCACGCGGGTGCCAGTGGCATGCGTGAAGTGGTGATTGGGATGGCGCACCGCGGTCGCTTGAACATGCTGGTCAACGTTCTGGGTAAAAAACCGCAAGATCTGTTTGATGAGTTTGCCGGTAAACATGGCGAAGGCTGGGGCACAGGTGATGTGAAATATCACCAAGGTTTCTCCGCTGACTTTGCGACACCGGGCGGTGATGTTCACTTAGCACTGGCTTTCAACCCATCGCATCTTGAGATTGTGAACCCTGTTGTGATGGGTTCAGTTCGCGCGCGTCAAGACCGCCTAGGTGATGAAGATGGCAGTAAAGTGCTACCTATCACTATCCATGGTGACTCTGCGATTGCCGGACAAGGTGTGGTGGCT +>42 +AAACGCTTCCTGAGCGAGCTGACCGCAGCCGAAGGCCTTGAGCGCTACCTGGGCGCGAAGTTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGCGACGCGCTGATCCCGATGCTGAAAGAGATGATTCGCCACGCGGGCAACAGCGGCACGCGTGAAGTGGTGCTGGGTATGGCGCACCGCGGTCGTCTTAACGTGCTGGTTAACGTGCTGGGTAAAAAACCGCAGGACCTGTTCGACGAGTTCGCGGGCAAACACAAAGAACACCTTGGCACCGGCGACGTGAAGTACCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGCCTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTAATTGGTTCGGTACGTGCCCGTCTGGATCGGCTGGACGAGCCGAGCAGCAACAAAGTACTGCCGATCACCATTCACGGCGACGCCGCGGTGACCGGTCAGGGCGTGGTTCAG +>43 +GTGCTGGGCCGTAATGGTTCCGACTATTCCGCCGCCGTGCTGGCCGCCTGTTTACGCGCTGACTGCTGTGAAATCTGGACTGACGTCGATGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGACTGCTGAAATCGATGTCCTACCAGGAAGCGATGGAACTCTCTTACTTCGGCGCCAAAGTCCTTCACCCTCGCACCATAACGCCTATCGCCCAGTTCCAGATCCCCTGTCTGATTAAAAATACCGGTAATCCGCAGGCGCCAGGAACGCTGATCGGCGCGTCCAGCGACGATGATAATCTGCCGGTTAAAGGGATCTCTAACCTTAACAACATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGGATGATTGGGATGGCGGCGCGTGTTTTCGCCGCCATGTCTCGCGCCGGGATCTCGGTGGTGCTCATTACCCAGTCCTCCTCTGAGTACAGCATCAGCTTCTGTGTGCCGCAGAGTGACTGC +>44 +GTGCTGGGGCGTAACGGTTCCGACTATTCCGCTGCGGTACTGGCCGCCTGTTTACGCGCCGACTGTTGCGAAATCTGGACGGACGTTGACGGTGTGTATACCTGCGACCCGCGCCAGGTGCCGGATGCCAGACTGCTGAAGTCAATGTCCTATCAGGAAGCGATGGAACTTTCCTACTTCGGCGCCAAAGTGCTTCACCCGCGTACCATTACTCCCATCGCTCAATTCCAGATCCCATGTCTGATAAAAAATACCGGTAATCCGCAAGCGCCGGGCACGCTGATTGGCGCCAACAGCGATGAAGACGGGCTACCGGTAAAAGGCATCTCGAACCTCAATAATATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGCATGGTCGGGATGGCGGCGCGCGTGTTCGCCACCATGTCGCGTGCCGGGATTTCGGTAGTGCTGATCACCCAATCCTCTTCGGAGTACAGCATCAGCTTCTGCGTGCCGCCAAAGCGATGC +>45 +GTGCTGGGCCGTAACGGCTCCGATTATTCCGCCGCCGTACTGGCCGCCTGTTTACGCGCTGACTGTTGTGAAATCTGGACTGACGTCGACGGCGTGTATACCTGCGACCCGCGTCAGGTGCCAGACGCCAGGCTGCTGAAGTCGATGTCTTATCAGGAAGCAATGGAGCTTTCTTACTTCGGCGCTAAAGTACTACATCCGCGCACTATTACTCCTATTGCCCAGTTCCAGATCCCTTGTCTGATTAAAAATACCGGCAATCCACAAGCGCCCGGTACGCTGATCGGCGCTGCCAGCGACGATGATGCTCTGCCGGTTAAAGGGATTTCTCACCTTAACAACATGGCGATGTTTAGTGTCTCCGGTCCGGGGATGAAAGGCATGGTGGGTATGGCGGCGCGCGTTTTTGCCGCTATGTCACGTGCGGGAATCTCGGTGGTGTTGATCACGCAATCTTCATCTGAATACAGCATCAGCTTCTGCGTGCCGCAGAGCGACTGC +>46 +GTGCTGGGCCGCAACGGTTCTGATTACTCCGCTGCGGTGTTGGCTGCCTGCTTACGCGCCGACTGTTGTGAGATCTGGACTGACGTTGACGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGGTTGCTGAAGTCGATGTCCTATCAGGAGGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTCCTTCATCCTCGCACCATCACCCCCATTGCCCAGTTCCAAATCCCATGCCTGATTAAAAACACCGGAAACCCGCAGGCCCCTGGTACGCTGATCGGCGCCAGCGTGGATGAAGACGAACTGCCGGTGAAAGGGATCTCGAACCTGAACAATATGGCGATGTTCAGCGTTTCCGGCCCAGGAATGAAAGGGATGATCGGGATGGCGGCGCGCGTCTTCGCGGCAATGTCCCGCGCGGGGATCTCCGTGGTGCTGATCACGCAATCCTCTTCTGAATACAGCATCAGTTTCTGCGTACCGCAGGGCGACTGC +>47 +GTGTTGGGGCGCAATGGCTCTGACTACTCTGCCGCTGTGCTGGCTGCCTGTTTACGCGCGGACTGTTGTGAGATCTGGACCGATGTCGACGGCGTATATACCTGCGATCCGCGCCAGGTACCCGATGCCCGACTGCTGAAGTCGATGTCTTATCAGGAAGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTTCTGCATCCGCGCACCATTACCCCAATTGCCCAGTTCCAGATCCCGTGCCTGATTAAAAATACCGGCAATCCACAAGCGCCTGGCACGTTGATCGGCGCCAGCAGTGATGAAGACGATTTGCCGGTAAAAGGTATTTCTAACCTCAATAACATGGCGATGTTTAGCGTCTCCGGCCCTGGAATGAAAGGCATGGTAGGCATGGCGGCGCGCGTTTTTGCCGCGATGTCGCGTGCGGGCATCTCGGTGGTGCTGATCACGCAGTCTTCTTCTGAATACAGCATCAGCTTCTGCGTTCCGCAGGGCGACTGC +>48 +GTATTAGGTCGCAATGGTTCAGACTACTCAGCTGCAGTATTAGCAGCCTGTTTACGTGCTAAATGCTGTGAAATTTGGACTGATGTTGACGGTGTTTATACTTGTGATCCACGTTTAGTGCCTGATGCACGTTTGTTAAAAGGCATGTCATATCAAGAGGCAATGGAACTGTCTTACTTTGGTGCCAAGGTACTTCATCCTCGTACAATTGCGCCTATTGCCCAATTCCAAATACCTTGTTTAATTAAAAATACGGGCAATCCAGATGCGCCGGGTACCTTGATTGGTGATGGTCAAAAAGATGAGAGCACACCTGTTAAAGGAATAACTAACCTTAATAATATGGCAATGATCAACGTATCTGGGCCTGGAATGAAAGGAATGGTAGGAATGGCGGCTCGCGTGTTCTCGGTAATGTCGAGAGCGGGGATTTCAGTTGTTCTAATCACACAGTCTTCTTCTGAATACAGCATTAGTTTTTGTGTGCCACAAAAAGAGCTG +>49 +GTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGCTGCCTGTTTACGCGCCGATTGTTGCGAGATTTGGACAGACGTTGACGGGGTCTATACCTGCGACCCGCGTCAGGTGCCCGATGCGAGGTTGTTGAAGTCGATGTCCTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCCCGCACCATTACCCCCATCGCCCAGTTCCAGATCCCTTGCCTGATTAAAAATACCGGAAATCCTCAAGCACCAGGTACGCTCATTGGTGCCAGCCGTGATGAAGACGAATTACCGGTCAAGGGCATTTCCAATCTGAATAACATGGCAATGTTCAGCGTTTCCGGCCCGGGGATGAAAGGAATGGTTGGCATGGCGGCGCGCGTCTTTGCAGCGATGTCACGCGCCCGTATTTCCGTGGTGCTGATTACGCAATCATCTTCCGAATACAGTATCAGTTTCTGCGTTCCACAAAGCGACTGT +>50 +GTGCTCGGGCGCAACGGCTCCGATTATTCCGCAGCGGTACTGGCAGCGTGTTTACGCGCCGATTGTTGCGAGATCTGGACTGATGTCGATGGTGTCTATACCTGCGACCCACGTCAGGTACCGGATGCCCGATTACTTAAGTCGATGTCGTACCAGGAGGCTATGGAACTCTCCTATTTCGGCGCCAAAGTCCTCCATCCTCGAACCATCACTCCCATCGCCCAGTTCCAGATTCCCTGCCTGATAAAAAATACCGGAAACCCGCAAGCACCAGGAACGCTGATTGGCGCCAGCCGCGACGAAGATGATCTGCCGGTGAAGGGCATTTCAAATCTCAATAATATGGCGATGTTCAGCGTCTCCGGGCCGGGGATGAAGGGAATGGTCGGCATGGCTGCTCGCGTGTTTGCGGCAATGTCTCGCTCAGGAATTTCGGTAGTCCTGATTACGCAATCCTCCTCTGAGTACAGCATTAGCTTCTGTGTACCGCAGGCTGACTGT +>51 +GTGCTGGGGCGTAACGGCTCTGACTACTCCGCCGCCGTGCTGGCGGCCTGCTTACGCGCGGACTGCTGTGAGATCTGGACTGACGTCGACGGCGTTTATACCTGCGATCCGCGCCAGGTACCGGACGCCAGGCTGCTGAAGTCGATGTCGTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCGCGTACCATCTCCCCGATTGCCCAGTTCCAAATCCCTTGCCTGATTAAGAATACCGGTAACCCTCAGGCGCCGGGCACGCTGATTGGCGCCAGCGCGGATGAAGATGAACTGCCGGTGAAAGGCATTTCTAACCTCAATAACATGGCGATGTTCAGCGTCTCCGGCCCGGGGATGAAGGGCATGGTCGGCATGGCGGCACGCGTATTTGCCGCTATGTCCCGCAACGGGATCTCCGTGGTGCTGATCACGCAGTCTTCTTCCGAATACAGCATCAGCTTCTGCGTTCCGCAGGGTGATTGC +>52 +GTATTAGGCCGTAACGGTTCCGACTACTCCGCCGCCGTGCTGGCCGCGTGTTTGCGCGCCGACTGTTGTGAGATCTGGACTGACGTCGACGGCGTCTATACCTGCGACCCGCGCCAGGTGCCGGACGCCAGGCTGCTGAAGTCGATGTCGTATCAGGAAGCCATGGAACTCTCCTACTTCGGCGCTAAAGTTCTCCACCCCCGCACCATTGCCCCCATCGCCCAGTTCCAAATCCCCTGTCTGATCAAAAACACTGGTAACCCGCAAGCGCCAGGCACCCTGATCGGTGCCAGCAGCGATGAAGACGGCCTGCCGGTGAAGGGCATCAGTAACCTGAATAATATGGCGATGTTCAGCGTCTCTGGTCCGGGCATGAAAGGCATGGTGGGAATGGCGGCGCGCGTGTTCGCGGCGATGTCCCGTGCGGGCATCTCGGTGGTGCTGATCACCCAATCGTCTTCTGAATACAGCATCAGCTTCTGCGTGCCGCAGGCCGACAGC diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.ndb b/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.ndb new file mode 100644 index 0000000000000000000000000000000000000000..dfa7d2e267e27fdcac41d817c56044823912df6b GIT binary patch literal 20480 zcmeI%u}*_f6adgGnn+wQ#(ywwF7Ez{QB0atSk$?{)y4En{D3a@50nD0P>05(Iy5;9 zy!&{M%k5#hB_bVp8+{VEZl~uF`CS|BJbm@Y&P}7mw14ZFqjezx0t5&U zAV7cs0RjXF5FoI>z|eR9NB=)$AEK`2tS>!{C;j~AzWslp33bORK$56a26G33lxW~VPj%q;tg;sy%comRr(uEevFOhThS)()6H3s z6>seN|G!V$0yw3B1Wqd;f-?%p;H&~dIH!OV&MP2> z3kt~Lq5^`rq<|zYBhZn=6$K=5RRKv{Q$P~e6_CUY1tf7(0ZD8qAckiNjy+M5)T!S#HIq0c%*}tG zZ^sMi6f4nz(xlPXYgh@Z&oHb@XD#FU)&IfI*Y>wVT&>YxPr&nmV7~6!xPjbe;7A0gSS#c1h+7YcT!^%w=s_o zQs4wev51dSQy0duf=|-XQ@Dd?*pZH%#)SS9@{s-)!w2|D{!^f4PZ_)9Tf|rLTNJwk#BTooH4p9;?y@ZRlXt;Imt1zmfvc{$?uMIgx$TZacinU3 wz6Tz9Z*opT}eelsIpMCMwH{bp6(=WgMapr&Q3tcn?%>V!Z literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.nsq b/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.nsq new file mode 100644 index 0000000000000000000000000000000000000000..7fc5e3b8e6fafd8419a1bdda10966df4ce2396be GIT binary patch literal 6297 zcmZ9Mk3W-p|Hn6KrJ`R5o!jXgnzNjv&Ns@A(o}>_DwShXLu_kiQ(xyW9GXe`!NxHA zk)0e|rLs<^qAApUQm1oQ;>2k>gtg6b7dlbPee``_*Vg%-$Ndjn&pn^7_h&fU?yf3w z)y>@g_lZg&z0+@j-e8eXQ193s( zCt37>1RIlK%0=ZTQ{iTk#0hMf?Y1WEO+e(QDM92EsP-xy!p*!m9%CkAXH}HyS zDnQM^k(N%2$!BfaKI=wncT_Do{_gI*p>sGHGN5x(vKc%)LRK0R+F#`;{hW>R`#{;VsepQ=fwXjqR%F{|vn;5!`(c&qz}?-0p>s?bvI9KDzLYIY zjBWrrxM%LeO(^@}j`)3MXmc}(7TY2#76IH1`;o{kwn#>E-ZN}=i{q!fggrAM#JEh<0j{B0I+gw}{cx7nn>c1CW_g;XD z7TIpXEyII*Eji)l?l{;zL-q$#k;KYoFkyEkBB>J><5s3c3#Lj3*D5lgyn11GCX}z2 zG<;cg_rwF8hVG7_ba@}p#h5$LiF!sIxC4EN zQHaskxbkKxPXZl26=bl`gWYL2zVN7*D{meAS{kvGRCa(zV+vlsxu;jqY0H>L%qg3P zNJZ5uO|vOknyr0)DLVl^kLcd6dLpT>qqVSrZth^ZcYgrBIRF1%O?UIE04OnSn1nI zhcJ2wxc`klU3ePkJd|F6fIGdXGe&=BD+hH~Tg$TxE#+<1B}zl)D{08|WwQ8`NG1i{ zy{V5rizzVk!iKz$!tSX1J1CzKhQRJ=7WXl5KkQHJ!*;(9^yU*|*zVJXSC(Xy03CL( z0s6ML51#1lfd04L*@yhF-EE8D>iAzvLtN`*+t1TNuD^Z*?kd_WmO$$RyXThz9d+;O z4KY&<1zovVdAPd2=pmbXBT%}f{3D=$oMCZqL|U68qTeQT=Vmwu-KPv%iw!@9}5YY!4NxOSIe$xI?yRovV3!jW_SQsllP#(6je>&y{Z-=uku zb{ezgpnk(f)fP{WWQX$jw%hI6tyLw#C8iT!#@JOY5vMyzicZ`A4MyRy3dfo@APs~2 z-&Q*fAWY5RrGqFeL2keGJ*-QncHyL4i{uDKBsf_LYSwCw?eJtY#OgzK{v+(jaGZOE zk|A%`it9^4I(zfGD*^Wivt`r9MDiua z66B`&TI9yzoXKFKOk}oEutE;G^j(FH(=0UiAkXhJ-77THU2ki*)LA>;sFt;}{Gj9C z)o^=R?PF(x5Trlr7m8EI5~jTWQmnP(Ru_|ECPG$~^eT8R2_u&x*#cuRPO!;T=+c*N z{;+r2)2mw<5s589o@e&EmvxMnwrksKFSJ!^=^LTr3Ng!X-g}sH-p$$QXq(#2iW`Wu z|4mP)>wqs$6Mqfsn5Yp#N5xDpvP#mcNS+sy@&j}P825=pC#1*Au8&Am1$pYP zsVYKtQQJ|o5V`5wFR4dQJ{n1Jy7nFh7cxDNX2WD{H7kPD(zuLgaju3qru?HTDKCzn z=wxXGoBYAd6U@pL?Q>Y_OXz6xaDhqWQ1A0umD24ySR&g!_u0i+O}ud1gM=QooFtZ} z*G@?GcDD(dR?M9$XA@%o?QFN6>m3%>7>}^;73!DPar-sHMbZWA;^68D?dyEwH{tw} z+I{2UIy#?Gty-v0^hS17O$2~%UfF+F_W z{Mvo5!+)jocU4O-8bTK*9*c=%EGHgsQ%dj%m$Zf$8N+{k*yh8fC_Ow2qjzmjWr%G1 z?q^8S70KZNt%SFAC3fvnR?U>8zgsWB=p2XG7tVGRuJ`e<#wvumrBLr#kGhM&y^K*k zVNNf&ofaNFzczY&J?1XGSiAJBQMh>t!|SPB@2bKx*`#Zjdob%O!v-ZjFbSn+RXS4> z^dgVgTWoND5x%$;+)Kcng-l&og3;|KYP|`u;Ev;Z9|gLaT~w$~tmU2rx(dnz-Sk?F zmIwFraNU3S43=so!c{I6X2mhQYI5~56`sK$xf3F({Ko}Y`P@L-WT>|~z4Ev12@H`3 z{T5q@mQP5vdB%a=`@TXZazA}I{?CSa_EY6`4!e2IBq3J53So<({9o=TV0Tk`z7gC@ zYWZmx-9x&t2Cu9;95cxEI+d?iDA4jJx`94f%UnIMxin}M&EhUa-EH^Wq86t&@oZ+d z5}uhX?wKXu|2~duSYba^TSwRp<=12M0fcQ|sAp7h<1P;0L$8ieJ*E97-}oA(hXNh0 zuIhxB{+O~>7}L-2VjO)=mCk2!NtNA^F{6)(Hmi}%%G_1f@>cqNwsKN&F3dYM{29BF zv_%2sOZGaqQJUsK`8o%;tnL7Q+t>* z$eqDJZyxW(AgL5K;EpiSo6jY>pK^SIVbi$jg6zfUe8GdXKHrun8VHFhTHex}@r z$Zh+|b|w%um4;Pl3yAIoR)%e#2pB*6v{HA(?a~2bBATF>nOp74d(V(!j6e3&AJksAqmfo%@vF(+8onYfNKddqD6vk+k zM%z!Uq!Y&HqYUbNVA!D^zz|wIROlX-R%c+}X{DdL#Q z+t8{{wlTqGkvymodG7%XTh(FM@{qpk?~k5Dv+MinE46{!y% z&m&X3{jxq;bO%~3Q*ASR*G{aQ{U7Dc*Ywo?h_)7#TK9;ypMpKcOG4mwsq>R}RbiWB zd$_25%^Zayu>|!%H-}d0aUMm^995{{u700{R-Cn294B-SZv{nPZmgAi&}E4_n{_2& zjmU9~0q?_drasm)U1Uvv8O~I1c?vx2Of4q_S#+*ok2IbSHmQ`OJGHA%1`b9w>FT-- z4DL{Lri-)Z57#yNq7PoN4^!37Wpz!jaxHXKC`PZtjb}3PtrMBE*rJrV(MHOVVQNHD z5V3?R9Bw&jD&aNI3AgJBMxb{VjSN`mTb~63y=~>$*WBt=_&B7%LXXleI=QMd>POv~ zK0_k+5iJeq#gwahd&gJ9qOj%8H5U4}MAV&>Ll{ew13eM{U0UWW&fygMXyf%GI%-I! zh29J9Jk(u{(TfYfU5mO0_k^ey3))ckf$@(LkP0>C9@V>OFz}yIKmNh#o6vKILUa73 zK-U2MA?GmF$GPS#=DvqGqd>CHJCkrAQ!W@!Vp`~;6#H%94s_ZL;)NLDu=_a+{kQWd zeW|3VJwDG|htb>A;12XHn0upo3tAp?e;L*E2dDeiQ`XSqOc$U-dHWB*o&BkE4GHMU z2V;d^1ya8EOJZt{Mlhaf#J8rW%{n8BT^>CE^q7z&y-%r9I4s6?&zWYdC%lI8#YH1J zwEP!wr_O9t2MF#tdcUR)OvO7RS4KA7asTcC>^>OS|5c0bZ07`%JCpjqSNcVg>zcNE zA3Pi`$Ph~pc{iDE!0tq^d`UDZhY0S%_wMY&tfFYRI-NEI-5uyEaA!6=As8|D>5_c3 zd>ww?lHj-4?tfif#ou-Z_n(G)ZbA7T%$;&UU*u@Hx=ZNpnrAT%&GGG2hQ`Dgxy33_ z^{=iPX!wG6mp(u(Q`1~SGx}Er?2Z&BoJ{{V8`<_*wv*QJM#ZX~&7oac{83xyAG*UC zpSbGS%E)-piay%tveo-zr!7PRUUIw3Py5dj1y&>StA9#&G%{&LJ;Rl2c3%FCf5Ede z`-NWO;~u1WrX-+r!v&PiC}@dWT~*cM_Tnx*9;K6kekovgC(sY>2D(qSlhSdlYt`3l zL%Ra_*HQXJ-RbvaA4)gq z%j`rzk0DehfICKK6|`XPZr~nIy`+wG4bSLr2>7;BxZ(SQ;J*LU1Ag)k#(oP#-6v7^ z;)m-Q9B@~GI~jFf_qE0SdH3Zr{<9*QE&~0XH1!fM$-ZZ}#W!*Ful&z3x`c?jQwhzw z+f>6ZU*}9&SMD}9m@M@9Xoabozg|-kK&yLQQ^CWfnKJp7P`38 z`j@SaqfZ0(&Xn%*PoVrN)ID3V;W9I+sC!uDo9OohrT=MYlOXOvSD(opDhF<7urwz9 z$So#DPiSyUzDrM_mYs_tM_b*k^yL|Ta>ucqn7ih^yJ<1dB|4$Y6z#hAy8V0RSa%-+ z_a93!_k;#s(v}{S-iXn&4dW>N;xAf<=8tZNJ-v8m>Budkt*U=rm9E8Y4rs@aH5Yl(nmj8 zu|Ias${PO%$$hmzcla?6sZ=)i{M7JqlDKp7ci)W|{pGtC`gt~9xth1GaCNsc446AZ z+_HCF)kiIE^_JbE$j2tp@^JItqgS_odA1YPacmcM^Jdgt@Z0)eZ`WUk%cS7GhW4Ot z-L}3t3v00Qu)9MG>V8m?aETe!4&~P*`aX&N{8K1DK9A^rL(?P2u8w%YM1-3M`sylO z!xztizWvfU8aaAwv>_ll@>uH0^xc2Li~Gj!kz+>}N*~WaZ(b0!J{Vq{sZ1I#^zWnf z*R7-UqBq~k?XErJzs#Yb^i-v?Rk5Lw6+XA;U-UJJe&~yPnf;pcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.nto b/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/blast/nucleotide/nucleotide.nto new file mode 100644 index 0000000000000000000000000000000000000000..ad19396e81aff427697a109c3c035ac73cb27f3f GIT binary patch literal 216 zcmXBFg${xM06;;oyHK$M6YTo`U-LHJc6+}dFSG(hN|dQkrAD0wO0 +KFRPGHADYTYHQKYGVRDYRGGGRSSARETAMRVAAGAIAKKYLQQEFGIEVRAYLSQMGDVAIDKVDWNEIENNDFFCPDVDKVAAFDELIRELKKEGDSIGAKIQVVATGVPVGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVRQKGSQHRDPLTPQG +>1 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKHGIVIQGCLTQMGDIPLEIKDWQQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVANGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>2 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGVEIRGCLTQMGDIPLEIKDWSQVELNPFFCPDPDKIEVLDELMRGLKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>3 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLKEKLGIEVRGYLSQLGPITCDLVDWSIVESNPFFCPDPSRLDALDEYMRALKKEGNSIGAKVTVVAQGVPAGFGEPVFDRLDADLAHALMSINAVKGIEIGDGFGVVTLKGTENRDEITKKG +>4 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGIEIRGCLTQMGDIPLEIKDWSQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKDG +>5 +VFRPGHADYTYEQKYGFRDYRGGGRSSARETAMRVAAGAIAKKYLQQKFGIVIRGCLSQMGDIPLAIKDWDQVELNPFFCADADKLDALDELMRGLKKEGDSIGAKVTVVADGVPAGWGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVKLRGSQNRDEITKAG +>6 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAQKFGVVIRGCLTQMGDIPLEIKDWDQVEQNPFFCPDPDKIEALDELMRALKKEGDSIGAKVTVVADSVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFGVVQLRGSQNRDEITTAG +>7 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>8 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>9 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>10 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQPLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>11 +MEMVARVALVQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGERMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGQSLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>12 +MEMIARVTLTQPHDAGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGDSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>13 +MEMIARVALSLPHQAGATTVPARKFFDICRGLPEGAEIAVTLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCAMPVGQPLPNHSVIVPRKGVLELMRMLDGGDSPLRIQ +>14 +SALTENDLVFALSQHAVTFADAELQQQGKSWPSLPRYFAIGRTTALALHTVSGFNIHYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELIGETLTARGADVDFCECYQRSAKYYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>15 +AALGESDLLFALSQHAVAFAQSQLHQQDRKWPRLPTYFAIGRTTALALHTVSGQKILYPQDREISEVLLQLPELQNIAGKRALILRGNGGRELIGDTLTARGAEVTFCECYQRCAIHYDGAEEAMRWQSREVTTVVVTSGEMLQ +>16 +ATLTENDLVFALSQHAVAFAHAQLQRDGRNWPASPRYFAIGRTTALALHTVSGFDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRGRELLGETLTARGAEVSFCECYQRSAKHYDGAEEAMRWHTRGVTTLVVTSGEMLQ +>17 +AALTDNDLVFALSQHAVAFAHAQLQQQELDWPVQPRYFAIGRTTALALHTVNGCDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELLGKTLTERGAEVTFCECYQRSAKHYDGAEEAMRWHSRGVTTIVVTSGEMLQ +>18 +ETLGDNDLLFALSQHAVSFAHAQLQQQGLNWPSLPHYFAIGRTTALALHTVSGHKIRYPQDREISEVLLQLPELQSIAGKRALILRGNGGRELIGQTLTSRGADVTFCECYQRSAKHYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>19 +RLLQEGDLLFALSQHAVEFAHAQLQQHAVSWPHAPRYFAIGRTTALALHTASGIDVRYPLDREISEVLLQLPELQTIAGKRALILRGNGGRELLGETLRERGADVTFVECYQRCAKHYDGAEEAMRWHARGINTLVVTSGEMLQ +>20 +IAGCQKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADIARKVAEAVERQLAELPRAGTARQALSASRLIVTKDLAQCV +>21 +IAGCKKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGIAQNVAEAVERQLAELPRAETARQALSASRLIVTKDLAQCV +>22 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGMASRVAEAVERQLAALPRAETARQALSASRLIVTRSLAQCV +>23 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGTESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPAADMARRVAEAVERQLAELPRAETARQALNASRLIVTKDLAQCV +>24 +IAGCQKVVLCSPPPIADEILYAAKLCGVQAIYKVGGAQAISALAFGTVSIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADMAKRVGDAVERQLADLPRAETARQALSASRLIVARDLDQCI +>25 +IAGCKKVVLCSPPPIADEILYAAQLCGVKEVFNVGGAQAIAALALGTESIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAKLAEGVAEAVERQLAELSRADTARQALSASRLIVAKDLAQCV +>26 +IAGCKKVVLCSPPPIADEILYAARLCGVQQVYQVGGAQAIAALAFGTETVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATTDFVASDLLSQAEHGPDSQVILLTPDSAMAQAVADAVERQLAELPRAETARQALAESRLIVARDLAQCV +>27 +SDWATMQFAAEIFDILDIPHHVEVVSAHRTPDKLFSFAENAEENGFQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLRD +>28 +SDWTTMQFAAEIFEILDVPHHVEVVSAHRTPDKLFSFAETAEENGYHVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLSN +>29 +SDWATMQFAAEILDILNVPHHVEVVSAHRTPDKLFSFAEDAESNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>30 +SDWATMQFAVEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKELHQRLNG +>31 +SDWATMQFAAEIFDILNVPHHVEVVSAHRTPDKLFSFAESAEEKGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLRQRLAD +>32 +SDWATMQFAAEIFEMLDVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKALHQRLSD +>33 +SDWATMSHAADVLDTLQIPYHVEIVSAHRTPDKLFSFAEKAKSNGFDVIIAGAGGAAHLPGMLAAKTLVPVFGVPVQSATLSGVDSLYSIVQMPKGIPVGTLAIGKAGAANAALLAAQVLALHSPAILDALTA +>34 +SDWATMQFAAEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYEVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLHQRLAE +>35 +SDWATMQFAAETAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>36 +SDWATMQHAAEILDALDVPYHVEVVSAHRTPDKLFSFAESAQHNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDDALLARLAA +>37 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLINVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>38 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>39 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDFQTDGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>40 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>41 +RTFLEELTAAEGLERYLGAKFPGAKRFSLEGGDALVPMTKEMIRHAGASGMREVVIGMAHRGRLNMLVNVLGKKPQDLFDEFAGKHGEGWGTGDVKYHQGFSADFATPGGDVHLALAFNPSHLEIVNPVVMGSVRARQDRLGDEDGSKVLPITIHGDSAIAGQGVVA +>42 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>43 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDDDNLPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>44 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGANSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFATMSRAGISVVLITQSSSEYSISFCVPPKRC +>45 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGAASDDDALPVKGISHLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>46 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASVDEDELPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>47 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>48 +VLGRNGSDYSAAVLAACLRAKCCEIWTDVDGVYTCDPRLVPDARLLKGMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPDAPGTLIGDGQKDESTPVKGITNLNNMAMINVSGPGMKGMVGMAARVFSVMSRAGISVVLITQSSSEYSISFCVPQKEL +>49 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDC +>50 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRSGISVVLITQSSSEYSISFCVPQADC +>51 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTISPIAQFQIPCLIKNTGNPQAPGTLIGASADEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRNGISVVLITQSSSEYSISFCVPQGDC +>52 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQADS diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/config.json b/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/config.json new file mode 100644 index 0000000..073adf5 --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/config.json @@ -0,0 +1,12 @@ +{ + "db_name": "Locidex Database 3", + "db_version": "1.0.0", + "db_date": "04/04/2024", + "db_author": "test1", + "db_desc": "test1", + "db_num_seqs": 53, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/meta.json b/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/meta.json new file mode 100644 index 0000000..f3b88fa --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/meta.json @@ -0,0 +1,1181 @@ +{ + "info": { + "num_seqs": 53, + "is_cds": "True", + "trans_table": 11, + "dna_min_len": 220, + "dna_max_len": 350, + "dna_min_ident": 80, + "aa_min_len": 73, + "aa_max_len": 116, + "aa_min_ident": 80 + }, + "meta": { + "0": { + "seq_id": 0, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 609, + "dna_seq_len": 501, + "dna_seq_hash": "4811bc98591c74954ace3cb487330482", + "aa_seq_len": 167, + "aa_seq_hash": "a8fbcf8179d8548f980b7b15f29de1d4", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "1": { + "seq_id": 1, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 614, + "dna_seq_len": 501, + "dna_seq_hash": "b66979eaf680fab872ffe1bde4c092d6", + "aa_seq_len": 167, + "aa_seq_hash": "3e034a4d80ac27352822774abd9319df", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "2": { + "seq_id": 2, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 618, + "dna_seq_len": 501, + "dna_seq_hash": "f02a36ff6df05f9bf38428fa22a035da", + "aa_seq_len": 167, + "aa_seq_hash": "e2d30bb18231528ef65c34880704dd7a", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "3": { + "seq_id": 3, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 619, + "dna_seq_len": 501, + "dna_seq_hash": "bee9d7360aa8e9b840fb29afa1de2c2e", + "aa_seq_len": 167, + "aa_seq_hash": "c3f71f5780b5f1031aaf21697a482ee3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "4": { + "seq_id": 4, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 620, + "dna_seq_len": 501, + "dna_seq_hash": "5b7956485455fdbc7c86d4834a8f7406", + "aa_seq_len": 167, + "aa_seq_hash": "60ce8f3b07f53378580ee528910ee623", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "5": { + "seq_id": 5, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 624, + "dna_seq_len": 501, + "dna_seq_hash": "98ba14aac74444a253123aff3d20c69f", + "aa_seq_len": 167, + "aa_seq_hash": "bab41702c7c209def93f9c9930c27086", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "6": { + "seq_id": 6, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 716, + "dna_seq_len": 501, + "dna_seq_hash": "6b9166d5d996897cae3cc288d7969d78", + "aa_seq_len": 167, + "aa_seq_hash": "5bc86c0a9226224922cbd6219c182622", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "7": { + "seq_id": 7, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "d401763f2df6e5fe87e1e07d3c170fe6", + "aa_seq_len": 167, + "aa_seq_hash": "928ad814483bbffda3e3b3a0aa4ca072", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "8": { + "seq_id": 8, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 120, + "dna_seq_len": 501, + "dna_seq_hash": "9c50d73cc4ef8d0a447f07ad150ad8cc", + "aa_seq_len": 167, + "aa_seq_hash": "928ad814483bbffda3e3b3a0aa4ca072", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "9": { + "seq_id": 9, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 555, + "dna_seq_len": 501, + "dna_seq_hash": "fab4f658dfba0cd0174a4a87998cf948", + "aa_seq_len": 167, + "aa_seq_hash": "a081905e659429db1f40e145932ae277", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "10": { + "seq_id": 10, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 557, + "dna_seq_len": 501, + "dna_seq_hash": "acb2ed027124e2a54b7734cd538590f1", + "aa_seq_len": 167, + "aa_seq_hash": "970184ec5ccc9f02ee3c858d2687cc18", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "11": { + "seq_id": 11, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 558, + "dna_seq_len": 501, + "dna_seq_hash": "ad996a122298d55ab3d4b2ea7a4974b0", + "aa_seq_len": 167, + "aa_seq_hash": "945455021fffea9b793d16af630db961", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "12": { + "seq_id": 12, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 563, + "dna_seq_len": 501, + "dna_seq_hash": "815242e67f31f4e2968f7f0620565125", + "aa_seq_len": 167, + "aa_seq_hash": "1b117ca76a022ae63d6f7bfe2ead289e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "13": { + "seq_id": 13, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 633, + "dna_seq_len": 501, + "dna_seq_hash": "532742ae95c046241789d79e68e30b7a", + "aa_seq_len": 167, + "aa_seq_hash": "fff51d2396f3da88a775416b4c6d14b6", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "14": { + "seq_id": 14, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 316, + "dna_seq_len": 432, + "dna_seq_hash": "3922f6256f2891400db415013eb0b208", + "aa_seq_len": 144, + "aa_seq_hash": "0af9d546dfcaf93373a8919df3e30323", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "15": { + "seq_id": 15, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 343, + "dna_seq_len": 432, + "dna_seq_hash": "f76c13e33ad5b502dfe64181dbdf2378", + "aa_seq_len": 144, + "aa_seq_hash": "32484f065f9013aaa5b3c694cc99cdbf", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "16": { + "seq_id": 16, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 472, + "dna_seq_len": 438, + "dna_seq_hash": "80bea3abd165ee14e51bc9e9779fc6a1", + "aa_seq_len": 146, + "aa_seq_hash": "4e9cc2d289f1c946738cc8e6e4ef1186", + "dna_min_len": 306, + "dna_max_len": 744, + "aa_min_len": 102, + "aa_max_len": 248, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "17": { + "seq_id": 17, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 489, + "dna_seq_len": 432, + "dna_seq_hash": "83a314185d9ff0bf7c2953d30979e7eb", + "aa_seq_len": 144, + "aa_seq_hash": "5f9fc3707789543f2f14b0f1a555a05c", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "18": { + "seq_id": 18, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 497, + "dna_seq_len": 432, + "dna_seq_hash": "c70622b317de74bdaf57eb8bb5134537", + "aa_seq_len": 144, + "aa_seq_hash": "56b3d46d3e517eb7f83f089f9ed5ae2a", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "19": { + "seq_id": 19, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 498, + "dna_seq_len": 432, + "dna_seq_hash": "f284b11b34de688e2ef54c1b73936595", + "aa_seq_len": 144, + "aa_seq_hash": "da558cdebd900031d0df8f58ef01454e", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "20": { + "seq_id": 20, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "9f762c246c542c52c94c5022ca62311c", + "aa_seq_len": 167, + "aa_seq_hash": "447381a0d286fa1037b5499e2242819a", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "21": { + "seq_id": 21, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 754, + "dna_seq_len": 501, + "dna_seq_hash": "65b434bea0d1939d2b748dbc5dd6df8b", + "aa_seq_len": 167, + "aa_seq_hash": "2b685aa7892794b69c9faa20c58a9183", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "22": { + "seq_id": 22, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 757, + "dna_seq_len": 501, + "dna_seq_hash": "eccfc35078428e44e5dd3e85d9ebf1fe", + "aa_seq_len": 167, + "aa_seq_hash": "35fa89ee4cd8689b89d553157471afe0", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "23": { + "seq_id": 23, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 759, + "dna_seq_len": 501, + "dna_seq_hash": "ce01d780cd0ffe3197f708d7048a473b", + "aa_seq_len": 167, + "aa_seq_hash": "bc0edd26ea6032cc4939e8cbc17a12d3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "24": { + "seq_id": 24, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 768, + "dna_seq_len": 501, + "dna_seq_hash": "23377e95fe00bf6a16b51fe8929a938a", + "aa_seq_len": 167, + "aa_seq_hash": "9fb34628ef67396ed38c755280e04f7e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "25": { + "seq_id": 25, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 838, + "dna_seq_len": 501, + "dna_seq_hash": "8478cdd016753651cd73afc4ad20c7df", + "aa_seq_len": 167, + "aa_seq_hash": "6512669779521a6792ecdae3088467f7", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "26": { + "seq_id": 26, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 907, + "dna_seq_len": 501, + "dna_seq_hash": "ab935d39fffeff601d95a8362ba454f3", + "aa_seq_len": 167, + "aa_seq_hash": "1c277aef51e883e29ee8b489c525ea1b", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "27": { + "seq_id": 27, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 24, + "dna_seq_len": 399, + "dna_seq_hash": "a7af783dc7084f1b8bc593aa29f80003", + "aa_seq_len": 133, + "aa_seq_hash": "46a0c532edb92303b1b9d12a80056a60", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "28": { + "seq_id": 28, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 48, + "dna_seq_len": 399, + "dna_seq_hash": "9fb313e6232b0d0e14d2fc4be7c409f7", + "aa_seq_len": 133, + "aa_seq_hash": "0e56efdd1f7fbaf132524616e29d98ca", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "29": { + "seq_id": 29, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 317, + "dna_seq_len": 399, + "dna_seq_hash": "50cd750e2f6860dd489040f1d5f64f9b", + "aa_seq_len": 133, + "aa_seq_hash": "18e887a66ce56a930dbf8db48b406596", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "30": { + "seq_id": 30, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 487, + "dna_seq_len": 399, + "dna_seq_hash": "0e1384e36f3897f65690f9230d2bcd73", + "aa_seq_len": 133, + "aa_seq_hash": "20c9a488aa6542257a151ced866d2f8f", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "31": { + "seq_id": 31, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 608, + "dna_seq_len": 399, + "dna_seq_hash": "e180fd1852382c132851674a9e379c03", + "aa_seq_len": 133, + "aa_seq_hash": "c7da76b50946241fe125348a19a9b6a3", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "32": { + "seq_id": 32, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 611, + "dna_seq_len": 399, + "dna_seq_hash": "0ec842f985e93041c928ab7bb137295d", + "aa_seq_len": 133, + "aa_seq_hash": "be3990f2abaa8780b14e62d4fc8cd82a", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "33": { + "seq_id": 33, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 612, + "dna_seq_len": 399, + "dna_seq_hash": "9d42e484ea2936f87312f07abf0ad84a", + "aa_seq_len": 133, + "aa_seq_hash": "7af624e3930c7a5ab7785b08d925081c", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "34": { + "seq_id": 34, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 619, + "dna_seq_len": 399, + "dna_seq_hash": "02949c6f858f3cc5de1b13c9f5a40705", + "aa_seq_len": 133, + "aa_seq_hash": "52d120d4090a22e450633e01e4ccb729", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "35": { + "seq_id": 35, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 631, + "dna_seq_len": 315, + "dna_seq_hash": "c4715d7df9a9eebfe5a334dd55ee469b", + "aa_seq_len": 105, + "aa_seq_hash": "31aa38918b303bf67374188e11413e59", + "dna_min_len": 220, + "dna_max_len": 535, + "aa_min_len": 73, + "aa_max_len": 178, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "36": { + "seq_id": 36, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 724, + "dna_seq_len": 399, + "dna_seq_hash": "782d08e7ee8a031a1402020e708bfbbc", + "aa_seq_len": 133, + "aa_seq_hash": "b5f9063808b8be839e7f169bf73c88e4", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "37": { + "seq_id": 37, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "481b6454f33fae7875b4978c14094ec3", + "aa_seq_len": 167, + "aa_seq_hash": "fa04457773c66ae015014e915af2516d", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "38": { + "seq_id": 38, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 30, + "dna_seq_len": 501, + "dna_seq_hash": "79048d21794195277a6af839be13e6e1", + "aa_seq_len": 167, + "aa_seq_hash": "186c53cb5c2bf0b7ecac853c6067065d", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "39": { + "seq_id": 39, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 281, + "dna_seq_len": 501, + "dna_seq_hash": "f10d273aa97d5556a43b96721d666975", + "aa_seq_len": 167, + "aa_seq_hash": "4172d5e8c8265884fe5479e10527cb02", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "40": { + "seq_id": 40, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 399, + "dna_seq_len": 501, + "dna_seq_hash": "1839775cc7c29412648ec7b004e1a417", + "aa_seq_len": 167, + "aa_seq_hash": "c4cfbbf5c5814829188f4f404f312bd3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "41": { + "seq_id": 41, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 571, + "dna_seq_len": 501, + "dna_seq_hash": "fce3e68952108e415579b3ad24a3f150", + "aa_seq_len": 167, + "aa_seq_hash": "43372b6526524f5ed4542be83b5b8614", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "42": { + "seq_id": 42, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 686, + "dna_seq_len": 501, + "dna_seq_hash": "629ea0cbfe0d2e9f34b1ca034a6c55fd", + "aa_seq_len": 167, + "aa_seq_hash": "c4cfbbf5c5814829188f4f404f312bd3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "43": { + "seq_id": 43, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "eaec644b411bd0b3ab1e086fbabd29c9", + "aa_seq_len": 167, + "aa_seq_hash": "bfe756f2f421db752907a171f3a44d69", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "44": { + "seq_id": 44, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 31, + "dna_seq_len": 501, + "dna_seq_hash": "97e4acce4e840b1c48de51f55fccf620", + "aa_seq_len": 167, + "aa_seq_hash": "be9296cb1ea9443fb43c0f967d107988", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "45": { + "seq_id": 45, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 208, + "dna_seq_len": 501, + "dna_seq_hash": "fbc6cb34cddfb1fe6a7806d5f7613259", + "aa_seq_len": 167, + "aa_seq_hash": "b788ec581475c9ba71d997b2db6e1def", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "46": { + "seq_id": 46, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 630, + "dna_seq_len": 501, + "dna_seq_hash": "ce58c0cacd4e8d9fa4867d11f2add864", + "aa_seq_len": 167, + "aa_seq_hash": "c062c5c88bdebdf2883e06fe6823c71c", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "47": { + "seq_id": 47, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 631, + "dna_seq_len": 501, + "dna_seq_hash": "949426df5430f94547459d06c786d77b", + "aa_seq_len": 167, + "aa_seq_hash": "dac50e2b5df83fe87c9826ecf99d568e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "48": { + "seq_id": 48, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 632, + "dna_seq_len": 501, + "dna_seq_hash": "9a187a6b3e4675fe12ea213c7a23577c", + "aa_seq_len": 167, + "aa_seq_hash": "6536824faaa7880cfb44a6cd1ed057c9", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "49": { + "seq_id": 49, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 633, + "dna_seq_len": 501, + "dna_seq_hash": "7be8b9732228c1f82630b547d7011a5e", + "aa_seq_len": 167, + "aa_seq_hash": "1eac2cb94b8f619df1c9b0f3369f4a96", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "50": { + "seq_id": 50, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 637, + "dna_seq_len": 501, + "dna_seq_hash": "1895acdf991b49a885873fe82ce9ca85", + "aa_seq_len": 167, + "aa_seq_hash": "9fe9521d0bf495570a0fd425c0e48764", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "51": { + "seq_id": 51, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 638, + "dna_seq_len": 501, + "dna_seq_hash": "9776bbec78b5214d3dfca0d32b395d4b", + "aa_seq_len": 167, + "aa_seq_hash": "2914d167cc3579348e36d16afc628a39", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "52": { + "seq_id": 52, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 748, + "dna_seq_len": 501, + "dna_seq_hash": "6cf9d69644c819d9ecd3a0fd090977fc", + "aa_seq_len": 167, + "aa_seq_hash": "cf0168a601a4f5792c7326a2da650edb", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + } + } +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/results.json b/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/results.json new file mode 100644 index 0000000..15087ba --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_multiple/pass_three_db/results.json @@ -0,0 +1,14 @@ +{ + "analysis_start_time": "2024-04-04 14:12:12", + "parameters": { + "input_file": "locidex/example/build_db_mlst_in/senterica.mlst.txt", + "outdir": "/tmp/pytest-of-mwells/pytest-82/build0", + "name": "Locidex Database 2", + "db_ver": "1.0.0", + "db_desc": "", + "author": "", + "date": "", + "force": true + }, + "analysis_end_time": "2024-04-04 14:12:12" +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.fasta b/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.fasta new file mode 100644 index 0000000..a03cb89 --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.fasta @@ -0,0 +1,106 @@ +>0 +AAATTCCGTCCCGGACATGCGGACTACACCTATCACCAAAAATACGGTGTGCGAGATTACCGTGGCGGCGGCCGTTCATCGGCACGTGAAACCGCCATGCGTGTTGCTGCGGGAGCGATTGCCAAAAAATATCTGCAGCAAGAGTTTGGCATTGAAGTGCGTGCTTACTTGTCGCAAATGGGGGATGTCGCGATTGATAAAGTGGATTGGAATGAGATTGAAAACAACGATTTCTTCTGTCCTGATGTCGATAAAGTGGCTGCGTTTGACGAGCTGATCCGCGAGCTGAAAAAAGAAGGCGATTCGATCGGCGCGAAAATCCAAGTGGTCGCTACAGGCGTGCCGGTTGGACTGGGTGAGCCTGTGTTTGATCGCTTAGATGCGGATATTGCCCATGCCTTGATGAGCATCAACGCCGTGAAAGGAGTCGAGATTGGTGATGGCTTTGATGTGGTGCGCCAAAAAGGCAGCCAACACCGTGACCCGCTCACTCCACAAGGT +>1 +GTTTTCCGCCCGGGCCATGCCGACTATACCTACGAGCAGAAATACGGTCTGCGCGATTACCGTGGCGGCGGTCGTTCTTCCGCCCGTGAAACGGCGATGCGCGTCGCGGCTGGCGCGATTGCTAAAAAATATCTGGCGGAGAAACACGGCATCGTCATTCAGGGGTGTCTGACCCAGATGGGCGATATTCCGCTTGAAATCAAAGACTGGCAGCAGGTTGAACAAAACCCGTTTTTCTGTCCTGATCCAGATAAAATCGACGCGCTGGATGAACTGATGCGCGCCCTGAAGAAAGAGGGCGATTCGATTGGGGCAAAAGTGACCGTCGTGGCAAACGGCGTTCCGGCCGGGCTTGGCGAACCGGTCTTTGACCGTCTGGATGCGGACATCGCTCATGCGCTGATGAGCATCAACGCGGTAAAAGGCGTGGAGATTGGCGATGGGTTTGATGTGGTCGCGTTGCGAGGCAGCCAGAATCGCGATGAAATTACCAAAGAGGGC +>2 +GTTTTCCGTCCAGGACACGCTGACTATACCTATGAGCAGAAATATGGCCTGCGCGACTACCGTGGCGGCGGACGTTCATCCGCGCGTGAAACGGCGATGCGCGTTGCGGCTGGCGCGATTGCCAAAAAATATCTGGCGGAAAAATTCGGCGTTGAAATTCGCGGCTGTCTGACGCAGATGGGGGATATTCCGCTGGAGATCAAAGACTGGTCTCAGGTGGAGCTTAACCCGTTCTTTTGTCCAGACCCGGATAAAATCGAAGTGCTGGACGAACTGATGCGCGGGCTGAAGAAAGAGGGCGACTCCATCGGGGCAAAAGTGACCGTTGTTGCAAGCGGCGTACCGGCGGGTCTCGGCGAACCTGTATTCGACCGTCTGGATGCCGACATCGCCCATGCGCTGATGAGCATTAACGCCGTTAAGGGCGTTGAGATTGGCGACGGTTTTGACGTTGTTGCGCTGCGCGGCAGTCAGAACCGCGATGAGATCACCAAAGAAGGT +>3 +GTTTTCCGCCCAGGGCATGCTGATTATACCTATGAACAAAAATATGGTTTGCGTGATTATCGTGGTGGTGGACGTTCTTCTGCTCGTGAAACGGCAATGCGTGTCGCCGCAGGTGCGATTGCTAAAAAATATCTAAAAGAGAAATTAGGCATCGAAGTTCGAGGATATCTTTCTCAGCTAGGACCTATTACATGTGATCTTGTTGATTGGTCTATTGTTGAAAGCAATCCATTTTTCTGTCCTGATCCTTCACGTTTAGATGCGCTTGATGAATACATGCGTGCACTTAAAAAAGAAGGTAATTCTATTGGTGCAAAAGTCACTGTGGTTGCACAGGGTGTACCTGCTGGATTTGGTGAACCTGTCTTTGATCGATTAGATGCTGATTTAGCGCATGCTTTGATGAGTATCAATGCTGTCAAAGGTATAGAAATTGGTGATGGATTTGGTGTTGTAACATTAAAAGGTACAGAAAACCGAGATGAAATCACTAAAAAGGGA +>4 +GTTTTCCGTCCAGGCCATGCCGATTACACCTACGAACAAAAATACGGTCTGCGCGATTATCGCGGCGGCGGGCGCTCTTCCGCCCGCGAAACCGCCATGCGCGTGGCGGCAGGGGCGATTGCAAAAAAATATCTCGCCGAGAAATTTGGCATTGAGATTCGCGGCTGCCTGACCCAGATGGGTGACATTCCGCTGGAAATCAAAGACTGGTCGCAGGTCGAGCAAAATCCGTTTTTCTGCCCGGACCCGGACAAAATCGACGCGTTAGATGAACTGATGCGCGCGCTGAAAAAAGAGGGCGACTCCATCGGCGCGAAAGTCACCGTTGTTGCCAGTGGCGTCCCCGCCGGACTTGGCGAGCCGGTCTTTGACCGCCTGGATGCCGACATCGCCCATGCGCTGATGAGCATCAACGCGGTGAAAGGCGTAGAAATTGGTGATGGTTTTGACGTGGTGGCGCTGCGTGGCAGCCAGAACCGCGACGAAATCACCAAAGACGGT +>5 +GTTTTCCGTCCTGGTCACGCCGACTATACCTACGAACAAAAATATGGCTTTCGCGACTATCGCGGCGGCGGGCGTTCTTCCGCGCGTGAAACCGCGATGCGCGTGGCGGCAGGGGCAATTGCCAAAAAATATCTCCAGCAGAAATTCGGCATCGTTATCCGCGGCTGTCTGTCCCAGATGGGCGACATTCCGCTGGCAATCAAAGACTGGGATCAGGTAGAGCTCAACCCGTTCTTCTGCGCCGATGCCGACAAGCTGGACGCGCTGGATGAGCTGATGCGTGGCCTGAAAAAAGAGGGCGACTCCATTGGTGCGAAAGTCACCGTGGTGGCCGACGGCGTGCCGGCTGGCTGGGGCGAGCCGGTATTTGACCGCCTTGACGCCGACATCGCCCACGCGCTGATGAGCATCAACGCGGTGAAAGGCGTCGAAATCGGCGACGGTTTTGACGTGGTCAAGCTTCGCGGCAGCCAGAACCGCGACGAAATCACGAAGGCGGGT +>6 +GTGTTCCGTCCGGGGCACGCGGATTACACCTACGAACAAAAATACGGCCTGCGCGACTATCGCGGCGGCGGGCGTTCATCCGCCCGTGAAACCGCCATGCGCGTCGCGGCAGGCGCTATCGCCAAAAAATATCTGGCGCAGAAATTCGGCGTGGTGATTCGCGGCTGCCTGACCCAGATGGGTGATATTCCGCTGGAAATCAAAGACTGGGATCAGGTAGAGCAAAACCCGTTCTTCTGCCCGGACCCGGATAAAATCGAGGCGCTGGATGAGCTGATGCGCGCTCTGAAAAAAGAGGGCGATTCCATCGGCGCGAAAGTCACCGTGGTGGCCGACAGCGTGCCCGCCGGGCTTGGCGAGCCGGTATTTGACCGCCTGGACGCCGATATCGCCCACGCGCTGATGAGCATTAACGCCGTGAAGGGCGTGGAAATCGGCGACGGTTTCGGCGTGGTGCAACTGCGCGGCAGCCAGAACCGCGACGAAATCACCACTGCCGGT +>7 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAGCCAGGCGCCACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGCGGCCTGCCGGAGGGCGCGGAGATTGCCGTTCAGTTGGAAGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGCCGCTTCTCGCTGTCTACGCTGCCTGCCGCCGATTTCCCGAATCTTGACGACTGGCAAAGCGAAGTTGAATTTACGCTGCCGCAGGCCACGATGAAGCGCCTGATTGAAGCGACCCAGTTTTCGATGGCCCATCAGGATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAGCGAACTGCGCACTGTTGCGACCGACGGCCACCGTCTGGCGGTGTGCTCAATGCCGCTGGAGGCGTCTTTACCTAGCCACTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGTGGCGAAAACCCGCTGCGCGTGCAG +>8 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAACCCGGCGCTACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCCTGCCGGAAGGGGCGGAAATCGCCGTTCAGCTGGAGGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGTCGCTTTTCGCTGTCTACCTTACCGGCAGCAGACTTCCCGAATCTGGATGACTGGCAAAGCGAAGTGGAATTCACGCTGCCTCAGGCGACGATGAAACGCTTGATTGAGGCCACCCAGTTTTCGATGGCCCATCAGGACGTGCGCTACTACCTGAACGGTATGTTGTTTGAAACGGAAGGAAGCGAACTGCGCACCGTCGCGACCGACGGCCACCGTCTGGCGGTCTGTTCAATGCCGCTGGAGGCCTCTTTACCGAGCCATTCAGTGATCGTACCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTTGACGGCGGTGAAAATCCACTGCGTGTACAG +>9 +ATGGAAATGGTGGCGCGCGTTGCGTTGATTCAGCCTCATGAACCAGGCGCAACTACCGTCCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCTTGCCGGAAGGGGCTGAAATTGCCGTCCAGCTGGAAGGCGATCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTCTCGCTTTCCACGCTGCCTGCCGCCGATTTCCCTAATCTGGATGACTGGCAGAGCGAAGTCGAATTCACCCTGCCGCAGGCAACGATGAAGCGCCTGATTGAAGCCACCCAGTTCTCAATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAGACTGAAGGTGAAGAGTTGCGTACCGTCGCGACCGACGGTCACCGTCTGGCGGTCTGCTCTATGCCGGTCGGGCAATCTCTGCCTAACCATTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAGCTGATGCGTATGCTCGACGGCGGCGAAACCCCGCTGCGCGTACAG +>10 +ATGGAGATGGTGGCGCGCGTGGCGCTGATCCAGCCTCATGAACCTGGTGCGACCACCGTTCCGGCGCGTAAATTCTTCGATATTTGCCGTGGATTACCAGAAGGGGCGGAAATTGCCGTTCAACTGGAAGGCGACCGTATGCTGGTGCGTTCTGGCCGCAGCCGTTTCTCGCTGTCTACGCTGCCTGCCGCCGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTCGAATTCACCCTGCCACAGGCGACAATGAAGCGCCTGATTGAAGCCACGCAGTTTTCGATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAAACCGAAGGGGAAGAGTTGCGTACCGTGGCGACCGACGGTCACCGCCTGGCGGTCTGTTCAATGCCTGTCGGTCAGCCGTTGCCTAGCCATTCGGTGATCGTACCGCGTAAAGGTGTGATTGAACTGATGCGTATGCTCGACGGCGGCGATAACCCGCTGCGCGTGCAG +>11 +ATGGAAATGGTGGCACGCGTTGCGCTGGTTCAGCCGCACGAACCAGGGGCGACGACCGTTCCAGCGCGCAAATTCTTTGATATCTGCCGTGGTCTGCCTGAAGGCGCGGAAATTGCCGTGCAGCTGGAAGGTGAGCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTTTCGCTGTCTACCCTGCCAGCGGCGGATTTCCCGAATCTCGATGACTGGCAGAGCGAAGTCGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCGACCCAGTTTTCTATGGCGCATCAGGACGTTCGCTATTACTTAAACGGTATGCTGTTTGAAACCGAAGGTGAAGAACTGCGCACCGTGGCGACCGACGGCCACCGTCTGGCAGTCTGTTCAATGCCAATTGGTCAATCTTTGCCAAGCCATTCGGTGATCGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGCGGCGACAATCCGCTGCGCGTGCAG +>12 +ATGGAAATGATCGCGCGCGTTACGCTGACTCAGCCGCACGACGCGGGCGCGACCACGGTTCCGGCACGTAAATTCTTTGATATTTGCCGTGGGCTGCCGGAAGGCGCTGAAATCGCAGTGCAGCTGGAGGGCGACCGCATGCTGGTGCGCTCTGGCCGCAGCCGTTTCTCCCTCTCCACGTTGCCCGCTGCGGACTTCCCGAACCTGGATGACTGGCAGAGCGAAGTTGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCCACGCAGTTCTCCATGGCGCATCAGGACGTTCGTTACTACTTAAACGGCATGCTGTTCGAAACCGAAGGTGAAGAGCTGCGTACCGTGGCGACCGACGGTCACCGTCTGGCGGTTTGTTCCATGCCGATTGGCGATTCACTGCCAAACCATTCGGTGATCGTACCGCGTAAAGGCGTAATTGAACTGATGCGTATGCTCGACGGCGGTGAAACGCCGCTGCGCGTGCAG +>13 +ATGGAGATGATCGCGCGTGTGGCGCTGTCGCTACCGCACCAGGCGGGCGCGACCACCGTGCCGGCGCGCAAATTCTTCGATATCTGCCGTGGCTTGCCGGAAGGGGCGGAAATCGCCGTTACGCTGGAAGGCGACAGAATGCTGGTGCGCTCCGGGCGCAGCCGCTTCTCGCTGTCTACGTTACCGGCGGCAGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTGGAGTTCACGCTCCCGCAGGCCACCATGAAGCGCCTGATCGAAGCGACCCAGTTCTCCATGGCCCATCAGGACGTGCGGTATTACCTGAACGGGATGCTGTTTGAAACCGAAGGCGAAGAGCTGCGCACCGTGGCGACTGACGGCCACCGTCTGGCGGTATGCGCGATGCCGGTAGGCCAACCGCTGCCAAACCATTCGGTGATTGTACCGCGTAAAGGCGTGCTGGAGCTGATGCGTATGCTCGATGGCGGCGACAGCCCGCTGCGCATTCAG +>14 +TCGGCGCTGACGGAAAACGATCTGGTCTTCGCCCTCTCGCAGCACGCCGTCACCTTTGCAGATGCCGAGCTTCAGCAACAAGGGAAAAGCTGGCCCTCCCTTCCGCGTTATTTTGCCATTGGTCGCACAACGGCGCTGGCGCTGCATACCGTTAGCGGTTTCAATATTCACTACCCTCTGGATCGGGAAATTAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGCGCGCTTATATTACGCGGCAATGGTGGCCGTGAGCTGATAGGTGAAACCCTGACAGCACGCGGAGCTGATGTCGATTTTTGTGAATGTTATCAACGCAGTGCAAAATATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCTCGTGGTGTGACCACGGTGGTTGTCACCAGCGGAGAGATGCTACAA +>15 +GCGGCGCTGGGGGAGAGCGATCTGTTGTTTGCCCTCTCGCAACACGCGGTTGCTTTTGCCCAATCACAGCTGCATCAGCAAGATCGTAAATGGCCCCGACTACCTACTTATTTCGCCATTGGACGCACCACCGCACTGGCGCTACATACCGTAAGCGGACAGAAGATTCTCTACCCGCAGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGTGCGCTGATATTACGTGGCAATGGCGGTCGTGAGCTAATTGGGGATACCCTGACGGCGCGCGGTGCTGAGGTCACTTTTTGTGAATGTTATCAACGATGCGCAATCCATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCCCGCGAGGTGACGACGGTCGTTGTTACCAGCGGTGAAATGTTGCAG +>16 +GCGACGTTGACGGAAAACGATCTGGTTTTTGCCCTTTCACAGCACGCCGTCGCCTTTGCCCACGCCCAACTCCAGCGAGATGGTCGAAACTGGCCTGCGTCGCCGCGCTATTTCGCGATTGGTCGCACCACGGCGCTCGCCCTTCATACCGTTAGCGGGTTCGATATTCGTTATCCATTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGCGCGCTGATTTTGCGTGGCAATGGCGGTCGCGGTCGCGAACTGCTGGGCGAAACCCTGACAGCTCGCGGAGCCGAAGTCAGTTTTTGTGAATGTTATCAACGAAGTGCGAAACATTACGATGGCGCAGAAGAGGCGATGCGCTGGCACACTCGCGGCGTAACGACGCTTGTTGTCACCAGCGGCGAGATGTTGCAA +>17 +GCGGCGCTCACGGACAACGATCTGGTGTTCGCCCTCTCGCAACACGCCGTCGCCTTTGCCCACGCCCAACTGCAACAGCAGGAGCTGGACTGGCCTGTGCAACCACGCTACTTCGCCATCGGGCGCACAACGGCGCTGGCGCTGCATACCGTTAACGGATGCGATATTCGCTATCCTCTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGAGCGCTTATTTTACGGGGCAACGGCGGGCGTGAACTGTTAGGCAAAACCCTCACAGAACGCGGCGCTGAAGTCACCTTTTGTGAATGTTATCAACGCAGTGCAAAACATTACGATGGCGCGGAAGAGGCGATGCGCTGGCACTCTCGCGGCGTGACGACGATTGTTGTCACCAGCGGCGAAATGCTGCAA +>18 +GAAACACTTGGCGATAACGATCTGCTCTTTGCACTTTCTCAACATGCAGTGTCATTCGCCCATGCGCAGTTGCAACAGCAGGGGCTAAACTGGCCATCACTTCCGCATTATTTCGCTATTGGCCGTACTACCGCTCTCGCCCTGCACACCGTAAGCGGACATAAGATTCGCTATCCACAAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCGGAATTACAAAGTATTGCGGGAAAACGCGCACTTATTTTGCGCGGTAACGGCGGCCGTGAATTGATCGGTCAGACGCTGACATCACGTGGTGCCGACGTTACTTTTTGTGAATGTTATCAACGCAGTGCGAAGCATTACGATGGTGCGGAAGAAGCTATGCGCTGGCAGTCTCGCGGCGTAACAACCGTCGTTGTAACCAGCGGTGAAATGCTGCAA +>19 +CGTCTCTTGCAGGAAGGCGATCTGCTCTTTGCGCTGTCGCAGCATGCCGTGGAGTTTGCCCATGCGCAGCTGCAACAGCATGCCGTTAGCTGGCCTCACGCCCCCCGCTATTTCGCCATCGGGCGCACCACGGCGCTGGCCTTACATACCGCGAGCGGAATCGATGTTCGTTACCCGTTAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAACCATTGCCGGAAAGCGCGCGCTCATTTTGCGCGGCAACGGTGGCCGCGAACTGCTGGGCGAAACGCTGCGCGAACGCGGCGCAGACGTGACGTTTGTGGAGTGCTATCAGCGCTGTGCGAAACACTATGATGGCGCGGAAGAAGCAATGCGCTGGCACGCCCGCGGTATTAATACGCTGGTGGTCACCAGCGGTGAAATGTTACAA +>20 +ATTGCGGGATGCCAGAAGGTGGTTCTGTGCTCGCCGCCACCCATCGCTGATGAAATCCTCTATGCGGCGCAACTGTGTGGCGTGCAGGAAATCTTTAACGTCGGCGGCGCGCAGGCGATTGCCGCTCTGGCCTTCGGCAGCGAGTCCGTACCGAAAGTGGATAAAATTTTTGGCCCCGGCAACGCCTTTGTAACCGAAGCCAAGCGTCAGGTCAGCCAGCGTCTCGACGGCGCGGCTATCGATATGCCAGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCAACACCGGATTTCGTCGCTTCTGACCTGCTCTCCCAGGCTGAGCACGGCCCGGATTCCCAGGTGATCCTGCTGACGCCGGATGCTGACATTGCCCGCAAGGTGGCGGAGGCGGTAGAACGTCAACTGGCGGAACTGCCGCGCGCGGGCACCGCCCGGCAGGCCCTGAGCGCCAGTCGTCTGATTGTGACCAAAGATTTAGCGCAGTGCGTC +>21 +ATTGCCGGATGCAAAAAAGTGGTGTTGTGCTCGCCACCGCCTATCGCGGATGAAATCCTTTACGCTGCGCAGCTGTGCGGCGTGCAGGAAATCTTCAACGTCGGCGGCGCCCAGGCCATTGCCGCTCTGGCGTTCGGCAGCGAATCCGTGCCAAAAGTGGACAAAATTTTTGGCCCCGGCAACGCGTTTGTCACCGAGGCGAAACGCCAGGTCAGCCAGCGTCTCGACGGCGCGGCAATTGATATGCCTGCCGGCCCTTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCCACGCCAGATTTCGTGGCGTCTGACCTGCTCTCTCAGGCGGAACACGGCCCGGATTCTCAGGTCATCCTGCTGACCCCGGATGCCGGTATTGCGCAGAACGTCGCAGAGGCCGTCGAACGCCAGTTAGCGGAGTTACCGCGTGCAGAAACGGCGCGTCAGGCATTAAGCGCCAGCCGTCTGATCGTGACGAAAGACTTAGCCCAGTGCGTC +>22 +ATTGCAGGCTGTAAAAAAGTGGTGTTGTGCTCTCCCCCACCTATCGCCGATGAAATTCTGTATGCTGCGCAGCTCTGCGGCGTACAGGATGTGTTTAACGTTGGGGGCGCACAAGCTATTGCCGCGCTGGCATTTGGCAGTGAATCCGTGCCGAAAGTGGACAAAATTTTTGGCCCCGGTAATGCCTTTGTGACCGAAGCCAAACGTCAGGTGAGTCAGCGTCTGGACGGCGCCGCCATCGATATGCCAGCAGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCCACGCCGGATTTCGTTGCCTCTGACTTACTCTCGCAGGCCGAACACGGCCCCGATTCCCAAGTGATCCTGCTGACGCCGGATGCCGGTATGGCCAGCCGGGTTGCTGAAGCAGTAGAACGCCAGCTTGCAGCGCTGCCACGCGCTGAAACCGCGCGGCAGGCGTTAAGCGCCAGTCGTCTGATTGTCACCCGCTCCCTTGCGCAATGCGTA +>23 +ATTGCGGGCTGTAAAAAAGTGGTGCTGTGCTCACCGCCGCCGATTGCCGATGAGATCCTTTACGCGGCGCAGCTGTGCGGTGTGCAGGACGTGTTTAACGTCGGCGGCGCACAGGCCATTGCCGCGCTGGCGTTTGGTACAGAATCCGTGCCGAAAGTGGACAAAATCTTCGGGCCAGGTAACGCCTTTGTCACCGAGGCAAAACGTCAGGTGAGCCAGCGTCTGGACGGTGCGGCGATCGATATGCCCGCAGGCCCGTCGGAAGTGCTGGTGATTGCTGACAGCGGCGCAACGCCGGATTTCGTGGCTTCTGATTTGCTCTCCCAGGCTGAACACGGCCCGGACTCTCAGGTGATTTTACTGACGCCCGCTGCTGATATGGCGCGTCGCGTAGCCGAAGCTGTCGAACGCCAGCTGGCAGAACTGCCGCGAGCTGAAACCGCCCGCCAGGCACTGAACGCCAGCCGCCTGATCGTGACTAAAGATTTAGCGCAGTGCGTG +>24 +ATTGCCGGTTGTCAGAAGGTGGTGCTCTGCTCTCCTCCACCGATCGCCGATGAGATCCTGTACGCGGCGAAGCTGTGCGGCGTGCAGGCGATCTATAAAGTGGGCGGTGCGCAGGCGATTTCTGCCCTGGCGTTCGGAACAGTATCCATTCCTAAGGTCGACAAAATCTTTGGCCCGGGCAATGCCTACGTGACCGAGGCGAAGCGCCAGGTCAGCCAGCGTCTGGACGGCGCGGCGATTGATATGCCTGCCGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCTACACCGGATTTCGTGGCCTCTGACCTGCTCTCGCAGGCCGAGCACGGCCCTGACTCGCAGGTGATTTTACTGACGCCAGATGCCGACATGGCAAAACGCGTGGGCGACGCCGTTGAGCGTCAGCTGGCTGACCTGCCGCGGGCGGAAACGGCGCGTCAGGCGCTATCCGCCAGCCGCCTGATTGTGGCCCGCGATCTTGACCAGTGCATC +>25 +ATCGCCGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTACGCCGCGCAACTCTGTGGCGTGAAAGAAGTGTTTAACGTGGGTGGCGCACAGGCCATTGCCGCGCTGGCGCTGGGCACGGAGTCTATTCCAAAAGTCGATAAAATCTTTGGGCCGGGCAACGCCTATGTGACCGAAGCCAAGCGCCAGGTCAGCCAGCGTCTTGACGGCGCGGCAATCGATATGCCCGCCGGACCGTCCGAAGTATTGGTTATCGCCGACAGCGGCGCAACGCCGGATTTTGTCGCCTCCGACCTGCTTTCTCAGGCCGAGCACGGCCCAGACTCGCAGGTGATCCTGCTGACGCCGGACGCTAAGCTTGCCGAGGGCGTGGCCGAAGCCGTTGAACGCCAGCTCGCCGAGCTGTCCCGCGCCGACACCGCGCGTCAGGCGCTCTCCGCCAGCCGTTTAATCGTAGCGAAAGATCTGGCGCAGTGCGTG +>26 +ATCGCGGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTATGCGGCGCGTTTGTGCGGGGTACAGCAGGTCTATCAGGTGGGCGGCGCTCAGGCCATCGCGGCGCTGGCGTTTGGCACCGAGACCGTACCCAAAGTGGACAAAATCTTCGGGCCGGGCAATGCGTTTGTCACCGAAGCCAAACGTCAGGTCAGCCAGCGGCTGGATGGCGCGGCGATTGATATGCCTGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGATAGCGGCGCGACCACGGATTTCGTGGCCTCGGATTTGCTGTCCCAGGCGGAACACGGCCCGGATTCGCAGGTGATCCTGCTGACACCGGACAGCGCCATGGCGCAGGCGGTGGCCGACGCGGTTGAGCGTCAACTCGCCGAACTGCCGCGCGCGGAAACAGCTCGCCAGGCGCTGGCGGAAAGCCGCCTGATTGTGGCGCGCGATTTAGCGCAGTGCGTG +>27 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAATTTTTGACATTCTGGATATTCCGCACCATGTCGAAGTGGTTTCTGCTCACCGTACCCCCGATAAACTGTTCAGCTTTGCCGAAAATGCTGAAGAAAACGGCTTTCAGGTAATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCAGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTTGGCGTTCCGGTACAAAGCGCTGCGCTAAGCGGTGTGGACAGTCTCTATTCTATTGTACAGATGCCGCGCGGTATTCCGGTTGGCACACTGGCCATCGGCAAAGCTGGCGCCGCTAACGCGGCGCTGCTGGCGGCGCAAATTCTGGCCACCCACGATAACGCACTGCATCAGCGCCTTCGCGAC +>28 +AGCGACTGGACTACCATGCAATTCGCCGCCGAAATTTTTGAAATTCTGGATGTTCCGCACCATGTAGAAGTGGTTTCCGCCCATCGAACCCCTGATAAACTGTTCAGCTTCGCCGAAACGGCGGAAGAGAACGGATATCACGTGATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACATTGGTGCCGGTACTCGGCGTTCCGGTACAAAGCGCAGCATTAAGCGGTGTGGATAGCCTTTACTCCATTGTTCAGATGCCGCGTGGCATTCCGGTGGGTACACTGGCTATCGGCAAAGCCGGGGCTGCGAACGCCGCGCTGCTGGCAGCGCAAATTTTGGCCACACACGATAATGCGCTGCACCAGCGCCTGAGCAAC +>29 +AGCGACTGGGCTACCATGCAGTTCGCCGCAGAAATCCTCGATATTCTGAACGTACCTCACCATGTTGAAGTGGTTTCCGCCCACCGCACGCCCGATAAACTGTTCAGCTTCGCCGAAGACGCCGAAAGCAACGGTTATCAGGTGATTATTGCCGGTGCCGGCGGCGCTGCGCACTTACCCGGAATGATTGCCGCCAAAACGCTGGTCCCGGTATTAGGTGTACCCGTCCAGAGCGCCGCATTAAGCGGTGTCGATAGCCTCTACTCCATCGTGCAGATGCCGCGCGGCATTCCGGTCGGTACGCTGGCGATCGGTAAAGCCGGTGCCGCTAACGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>30 +AGCGACTGGGCTACCATGCAGTTCGCCGTCGAAATCTTCGAAATCCTGAATGTCCCGCACCACGTTGAAGTGGTTTCTGCTCACCGCACCCCCGATAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTATCAGGTGATTATTGCGGGCGCAGGCGGCGCAGCGCACCTGCCAGGCATGATTGCCGCCAAAACGCTGGTGCCGGTGCTGGGCGTGCCAGTACAGAGCGCCGCACTGAGCGGTGTCGATAGCCTCTACTCCATCGTACAAATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATTGGTAAAGCTGGCGCGGCAAACGCGGCATTACTGGCAGCACAAATTCTCGCGACTCACGATAAAGAGCTACACCAGCGTCTGAATGGC +>31 +AGCGACTGGGCTACCATGCAGTTTGCCGCCGAAATCTTCGATATCCTGAACGTTCCACACCACGTTGAAGTGGTTTCCGCACACCGCACCCCCGATAAGCTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAAGGGTTATCAGGTGATTATTGCCGGTGCTGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTGGGCGTGCCGGTGCAAAGCGCTGCGCTGAGCGGCGTGGACAGCCTCTACTCTATCGTCCAGATGCCGCGCGGCATTCCGGTCGGCACGCTGGCGATCGGCAAAGCGGGCGCGGCGAACGCGGCGTTACTGGCAGCGCAAATTCTGGCGACACACGATAAAGACCTGCGCCAACGTCTGGCGGAC +>32 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTCGAAATGCTGGACGTTCCGCACCATGTTGAAGTCGTCTCAGCCCACCGTACCCCTGATAAACTGTTCAGCTTCGCCGAAAGCGCTGAAGAAAACGGTTATCAGGTTATTATTGCGGGTGCTGGCGGTGCAGCGCATCTGCCGGGCATGATTGCAGCGAAAACGCTGGTCCCCGTGTTAGGCGTTCCGGTACAAAGCGCAGCGTTGAGCGGCGTAGATAGCCTCTACTCAATCGTGCAGATGCCACGCGGCATCCCCGTGGGTACGCTGGCGATTGGGAAAGCGGGTGCGGCAAATGCGGCCCTGCTGGCAGCACAAATTCTGGCAACACACGACAAAGCATTACATCAGCGTCTGAGCGAC +>33 +AGTGACTGGGCAACCATGTCTCATGCCGCAGATGTATTAGATACACTACAAATTCCTTACCATGTTGAGATTGTCTCTGCACACCGAACCCCTGATAAGTTATTTAGTTTTGCTGAAAAAGCAAAAAGTAATGGCTTTGATGTCATTATTGCTGGTGCAGGAGGAGCTGCCCATTTACCAGGAATGCTTGCAGCTAAAACGTTAGTACCCGTATTTGGTGTTCCTGTTCAAAGTGCGACATTAAGCGGTGTTGATAGCCTCTATTCAATCGTACAAATGCCAAAAGGTATCCCTGTAGGAACCTTAGCGATTGGTAAAGCAGGGGCTGCCAATGCGGCTTTATTAGCGGCTCAAGTTTTAGCGTTACATTCTCCTGCTATTTTAGATGCATTGACTGCA +>34 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTTGAAATCCTGAATGTTCCGCACCACGTCGAAGTGGTTTCCGCACACCGTACCCCGGACAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTACGAGGTGATCATTGCCGGTGCGGGCGGCGCAGCACATCTGCCGGGCATGATTGCCGCCAAAACGCTGGTGCCGGTACTGGGTGTTCCCGTGCAAAGCGCCGCGTTAAGCGGGGTGGATAGCCTTTACTCTATTGTCCAGATGCCGCGCGGTATTCCTGTCGGTACCCTGGCGATTGGTAAAGCAGGTGCGGCAAATGCCGCCCTGCTGGCCGCGCAGATCCTGGCGACGCATGATAAAGATTTGCACCAGCGTCTGGCGGAG +>35 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAACGGCGGAAGAGAACGGATATCAAGTGATTATTGCCGGCGCGGGCGGCGCGGCGCACCTGCCGGGAATGATTGCGGCAAAAACGCTGGTCCCGGTACTCGGCGTGCCGGTACAAAGCGCTGCGCTAAGCGGCGTGGATAGCCTTTACTCCATTGTGCAGATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATCGGTAAAGCCGGTGCGGCTAATGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>36 +AGCGACTGGGCCACCATGCAGCATGCCGCTGAAATTCTTGATGCCCTTGATGTTCCTTACCATGTTGAAGTGGTTTCCGCTCACCGCACGCCTGATAAGCTTTTCAGCTTTGCTGAATCCGCGCAGCACAACGGTTATCAGGTGATTATTGCTGGCGCAGGCGGTGCGGCGCATCTGCCGGGCATGATCGCCGCGAAAACCCTGGTGCCGGTATTAGGCGTGCCGGTGCAAAGCGCGGCCCTGAGCGGCGTGGACAGCCTCTACTCTATCGTGCAAATGCCGCGCGGCATTCCGGTAGGGACGCTGGCGATCGGCAAAGCGGGTGCTGCAAACGCCGCACTGCTGGCGGCGCAGATCCTCGCCCAGCATGACGATGCGCTACTGGCGCGTCTGGCGGCA +>37 +AAACGCTTCCTGAACGAACTGACCGCCGCTGAAGGGCTGGAACGTTATCTGGGCGCCAAATTCCCGGGTGCGAAACGTTTCTCGCTCGAGGGGGGAGATGCGCTGATACCTATGCTGAAAGAGATGGTTCGCCATGCGGGTAACAGCGGCACTCGCGAAGTGGTGCTGGGGATGGCGCACCGCGGTCGTCTGAACGTGCTGATCAACGTACTGGGTAAAAAACCGCAGGATCTGTTCGACGAGTTTGCCGGTAAACATAAAGAACATCTGGGTACCGGCGACGTGAAGTATCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGTCTGGTTCACCTGGCGCTGGCGTTTAACCCATCGCATCTGGAAATTGTGAGCCCGGTGGTGATGGGCTCCGTGCGCGCCCGTCTGGACCGACTGGACGAACCGAGCAGTAATAAAGTGCTGCCGATCACTATTCACGGCGACGCCGCGGTGACCGGCCAGGGCGTGGTTCAG +>38 +AAACGCTTCCTGAACGAACTGACCGCTGCAGAAGGGCTGGAACGTTATCTGGGGGCAAAATTCCCTGGCGCGAAACGTTTTTCGCTGGAAGGCGGCGATGCGTTAATTCCGATGCTCAAAGAGATGGTCCGCCATGCGGGCAACAGCGGCACCCGCGAAGTGGTGTTGGGAATGGCGCACCGTGGTCGCCTGAACGTACTGGTCAACGTGCTGGGTAAAAAACCTCAGGATCTGTTTGACGAGTTTGCCGGTAAACATAAAGAACATTTGGGCACCGGCGACGTGAAGTACCATATGGGTTTCTCGTCGGATATCGAAACCGAAGGCGGACTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTCAGCCCGGTAGTGATGGGGTCTGTGCGCGCACGTCTCGACCGGCTCGACGAACCGAGCAGCAACAAAGTGTTGCCAATCACCATTCATGGTGATGCAGCAGTTACCGGGCAGGGCGTGGTTCAG +>39 +AAACGCTTCTTAAGCGAACTGACCGCCGCTGAAGGCCTTGAACGTTACCTCGGCGCAAAATTCCCTGGCGCAAAACGCTTCTCGCTGGAAGGCGGTGACGCGTTAATCCCGATGCTTAAAGAGATGATCCGCCACGCTGGCAACAGCGGCACCCGCGAAGTGGTTCTCGGGATGGCGCACCGTGGTCGTCTGAACGTGCTGGTGAACGTGCTGGGTAAAAAACCGCAAGACTTGTTCGACGAGTTCGCCGGTAAACATAAAGAACACCTCGGCACGGGTGACGTGAAATACCACATGGGCTTCTCGTCTGACTTCCAGACCGATGGCGGCCTGGTGCACCTGGCGCTGGCGTTTAACCCGTCTCACCTTGAGATTGTAAGCCCGGTAGTTATCGGTTCTGTTCGTGCCCGTCTGGACAGACTTGATGAGCCGAGCAGCAACAAAGTGCTGCCAATCACCATCCACGGTGACGCCGCAGTGACCGGGCAGGGTGTGGTTCAG +>40 +AAACGCTTCCTCAGCGAACTGACTGCAGCGGAAGGTCTGGAACGCTACCTGGGCGCGAAATTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGTGATGCGTTAATCCCAATGCTCAAAGAGATGATCCGCCACGCCGGTAACAGCGGTACCCGTGAAGTGGTACTGGGTATGGCGCACCGTGGTCGTCTGAACGTCCTGGTTAACGTGCTGGGTAAAAAGCCGCAGGATCTATTCGACGAATTTGCGGGCAAACATAAAGAACACCTCGGTACCGGTGACGTGAAGTACCACATGGGCTTCTCATCGGATATCGAAACCGAAGGCGGTCTGGTGCATCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTTATCGGTTCCGTACGTGCACGCTTGGATCGTCTGGACGAGCCGAGCAGCAATAAAGTGCTGCCAATCACTATTCATGGTGATGCGGCAGTAACCGGGCAAGGCGTGGTTCAG +>41 +CGTACTTTCCTTGAAGAGCTGACTGCCGCTGAAGGTTTAGAGCGCTATCTTGGTGCGAAATTCCCTGGTGCTAAACGTTTCTCTCTCGAAGGGGGGGATGCCTTAGTTCCGATGACCAAAGAGATGATCCGTCACGCGGGTGCCAGTGGCATGCGTGAAGTGGTGATTGGGATGGCGCACCGCGGTCGCTTGAACATGCTGGTCAACGTTCTGGGTAAAAAACCGCAAGATCTGTTTGATGAGTTTGCCGGTAAACATGGCGAAGGCTGGGGCACAGGTGATGTGAAATATCACCAAGGTTTCTCCGCTGACTTTGCGACACCGGGCGGTGATGTTCACTTAGCACTGGCTTTCAACCCATCGCATCTTGAGATTGTGAACCCTGTTGTGATGGGTTCAGTTCGCGCGCGTCAAGACCGCCTAGGTGATGAAGATGGCAGTAAAGTGCTACCTATCACTATCCATGGTGACTCTGCGATTGCCGGACAAGGTGTGGTGGCT +>42 +AAACGCTTCCTGAGCGAGCTGACCGCAGCCGAAGGCCTTGAGCGCTACCTGGGCGCGAAGTTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGCGACGCGCTGATCCCGATGCTGAAAGAGATGATTCGCCACGCGGGCAACAGCGGCACGCGTGAAGTGGTGCTGGGTATGGCGCACCGCGGTCGTCTTAACGTGCTGGTTAACGTGCTGGGTAAAAAACCGCAGGACCTGTTCGACGAGTTCGCGGGCAAACACAAAGAACACCTTGGCACCGGCGACGTGAAGTACCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGCCTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTAATTGGTTCGGTACGTGCCCGTCTGGATCGGCTGGACGAGCCGAGCAGCAACAAAGTACTGCCGATCACCATTCACGGCGACGCCGCGGTGACCGGTCAGGGCGTGGTTCAG +>43 +GTGCTGGGCCGTAATGGTTCCGACTATTCCGCCGCCGTGCTGGCCGCCTGTTTACGCGCTGACTGCTGTGAAATCTGGACTGACGTCGATGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGACTGCTGAAATCGATGTCCTACCAGGAAGCGATGGAACTCTCTTACTTCGGCGCCAAAGTCCTTCACCCTCGCACCATAACGCCTATCGCCCAGTTCCAGATCCCCTGTCTGATTAAAAATACCGGTAATCCGCAGGCGCCAGGAACGCTGATCGGCGCGTCCAGCGACGATGATAATCTGCCGGTTAAAGGGATCTCTAACCTTAACAACATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGGATGATTGGGATGGCGGCGCGTGTTTTCGCCGCCATGTCTCGCGCCGGGATCTCGGTGGTGCTCATTACCCAGTCCTCCTCTGAGTACAGCATCAGCTTCTGTGTGCCGCAGAGTGACTGC +>44 +GTGCTGGGGCGTAACGGTTCCGACTATTCCGCTGCGGTACTGGCCGCCTGTTTACGCGCCGACTGTTGCGAAATCTGGACGGACGTTGACGGTGTGTATACCTGCGACCCGCGCCAGGTGCCGGATGCCAGACTGCTGAAGTCAATGTCCTATCAGGAAGCGATGGAACTTTCCTACTTCGGCGCCAAAGTGCTTCACCCGCGTACCATTACTCCCATCGCTCAATTCCAGATCCCATGTCTGATAAAAAATACCGGTAATCCGCAAGCGCCGGGCACGCTGATTGGCGCCAACAGCGATGAAGACGGGCTACCGGTAAAAGGCATCTCGAACCTCAATAATATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGCATGGTCGGGATGGCGGCGCGCGTGTTCGCCACCATGTCGCGTGCCGGGATTTCGGTAGTGCTGATCACCCAATCCTCTTCGGAGTACAGCATCAGCTTCTGCGTGCCGCCAAAGCGATGC +>45 +GTGCTGGGCCGTAACGGCTCCGATTATTCCGCCGCCGTACTGGCCGCCTGTTTACGCGCTGACTGTTGTGAAATCTGGACTGACGTCGACGGCGTGTATACCTGCGACCCGCGTCAGGTGCCAGACGCCAGGCTGCTGAAGTCGATGTCTTATCAGGAAGCAATGGAGCTTTCTTACTTCGGCGCTAAAGTACTACATCCGCGCACTATTACTCCTATTGCCCAGTTCCAGATCCCTTGTCTGATTAAAAATACCGGCAATCCACAAGCGCCCGGTACGCTGATCGGCGCTGCCAGCGACGATGATGCTCTGCCGGTTAAAGGGATTTCTCACCTTAACAACATGGCGATGTTTAGTGTCTCCGGTCCGGGGATGAAAGGCATGGTGGGTATGGCGGCGCGCGTTTTTGCCGCTATGTCACGTGCGGGAATCTCGGTGGTGTTGATCACGCAATCTTCATCTGAATACAGCATCAGCTTCTGCGTGCCGCAGAGCGACTGC +>46 +GTGCTGGGCCGCAACGGTTCTGATTACTCCGCTGCGGTGTTGGCTGCCTGCTTACGCGCCGACTGTTGTGAGATCTGGACTGACGTTGACGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGGTTGCTGAAGTCGATGTCCTATCAGGAGGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTCCTTCATCCTCGCACCATCACCCCCATTGCCCAGTTCCAAATCCCATGCCTGATTAAAAACACCGGAAACCCGCAGGCCCCTGGTACGCTGATCGGCGCCAGCGTGGATGAAGACGAACTGCCGGTGAAAGGGATCTCGAACCTGAACAATATGGCGATGTTCAGCGTTTCCGGCCCAGGAATGAAAGGGATGATCGGGATGGCGGCGCGCGTCTTCGCGGCAATGTCCCGCGCGGGGATCTCCGTGGTGCTGATCACGCAATCCTCTTCTGAATACAGCATCAGTTTCTGCGTACCGCAGGGCGACTGC +>47 +GTGTTGGGGCGCAATGGCTCTGACTACTCTGCCGCTGTGCTGGCTGCCTGTTTACGCGCGGACTGTTGTGAGATCTGGACCGATGTCGACGGCGTATATACCTGCGATCCGCGCCAGGTACCCGATGCCCGACTGCTGAAGTCGATGTCTTATCAGGAAGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTTCTGCATCCGCGCACCATTACCCCAATTGCCCAGTTCCAGATCCCGTGCCTGATTAAAAATACCGGCAATCCACAAGCGCCTGGCACGTTGATCGGCGCCAGCAGTGATGAAGACGATTTGCCGGTAAAAGGTATTTCTAACCTCAATAACATGGCGATGTTTAGCGTCTCCGGCCCTGGAATGAAAGGCATGGTAGGCATGGCGGCGCGCGTTTTTGCCGCGATGTCGCGTGCGGGCATCTCGGTGGTGCTGATCACGCAGTCTTCTTCTGAATACAGCATCAGCTTCTGCGTTCCGCAGGGCGACTGC +>48 +GTATTAGGTCGCAATGGTTCAGACTACTCAGCTGCAGTATTAGCAGCCTGTTTACGTGCTAAATGCTGTGAAATTTGGACTGATGTTGACGGTGTTTATACTTGTGATCCACGTTTAGTGCCTGATGCACGTTTGTTAAAAGGCATGTCATATCAAGAGGCAATGGAACTGTCTTACTTTGGTGCCAAGGTACTTCATCCTCGTACAATTGCGCCTATTGCCCAATTCCAAATACCTTGTTTAATTAAAAATACGGGCAATCCAGATGCGCCGGGTACCTTGATTGGTGATGGTCAAAAAGATGAGAGCACACCTGTTAAAGGAATAACTAACCTTAATAATATGGCAATGATCAACGTATCTGGGCCTGGAATGAAAGGAATGGTAGGAATGGCGGCTCGCGTGTTCTCGGTAATGTCGAGAGCGGGGATTTCAGTTGTTCTAATCACACAGTCTTCTTCTGAATACAGCATTAGTTTTTGTGTGCCACAAAAAGAGCTG +>49 +GTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGCTGCCTGTTTACGCGCCGATTGTTGCGAGATTTGGACAGACGTTGACGGGGTCTATACCTGCGACCCGCGTCAGGTGCCCGATGCGAGGTTGTTGAAGTCGATGTCCTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCCCGCACCATTACCCCCATCGCCCAGTTCCAGATCCCTTGCCTGATTAAAAATACCGGAAATCCTCAAGCACCAGGTACGCTCATTGGTGCCAGCCGTGATGAAGACGAATTACCGGTCAAGGGCATTTCCAATCTGAATAACATGGCAATGTTCAGCGTTTCCGGCCCGGGGATGAAAGGAATGGTTGGCATGGCGGCGCGCGTCTTTGCAGCGATGTCACGCGCCCGTATTTCCGTGGTGCTGATTACGCAATCATCTTCCGAATACAGTATCAGTTTCTGCGTTCCACAAAGCGACTGT +>50 +GTGCTCGGGCGCAACGGCTCCGATTATTCCGCAGCGGTACTGGCAGCGTGTTTACGCGCCGATTGTTGCGAGATCTGGACTGATGTCGATGGTGTCTATACCTGCGACCCACGTCAGGTACCGGATGCCCGATTACTTAAGTCGATGTCGTACCAGGAGGCTATGGAACTCTCCTATTTCGGCGCCAAAGTCCTCCATCCTCGAACCATCACTCCCATCGCCCAGTTCCAGATTCCCTGCCTGATAAAAAATACCGGAAACCCGCAAGCACCAGGAACGCTGATTGGCGCCAGCCGCGACGAAGATGATCTGCCGGTGAAGGGCATTTCAAATCTCAATAATATGGCGATGTTCAGCGTCTCCGGGCCGGGGATGAAGGGAATGGTCGGCATGGCTGCTCGCGTGTTTGCGGCAATGTCTCGCTCAGGAATTTCGGTAGTCCTGATTACGCAATCCTCCTCTGAGTACAGCATTAGCTTCTGTGTACCGCAGGCTGACTGT +>51 +GTGCTGGGGCGTAACGGCTCTGACTACTCCGCCGCCGTGCTGGCGGCCTGCTTACGCGCGGACTGCTGTGAGATCTGGACTGACGTCGACGGCGTTTATACCTGCGATCCGCGCCAGGTACCGGACGCCAGGCTGCTGAAGTCGATGTCGTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCGCGTACCATCTCCCCGATTGCCCAGTTCCAAATCCCTTGCCTGATTAAGAATACCGGTAACCCTCAGGCGCCGGGCACGCTGATTGGCGCCAGCGCGGATGAAGATGAACTGCCGGTGAAAGGCATTTCTAACCTCAATAACATGGCGATGTTCAGCGTCTCCGGCCCGGGGATGAAGGGCATGGTCGGCATGGCGGCACGCGTATTTGCCGCTATGTCCCGCAACGGGATCTCCGTGGTGCTGATCACGCAGTCTTCTTCCGAATACAGCATCAGCTTCTGCGTTCCGCAGGGTGATTGC +>52 +GTATTAGGCCGTAACGGTTCCGACTACTCCGCCGCCGTGCTGGCCGCGTGTTTGCGCGCCGACTGTTGTGAGATCTGGACTGACGTCGACGGCGTCTATACCTGCGACCCGCGCCAGGTGCCGGACGCCAGGCTGCTGAAGTCGATGTCGTATCAGGAAGCCATGGAACTCTCCTACTTCGGCGCTAAAGTTCTCCACCCCCGCACCATTGCCCCCATCGCCCAGTTCCAAATCCCCTGTCTGATCAAAAACACTGGTAACCCGCAAGCGCCAGGCACCCTGATCGGTGCCAGCAGCGATGAAGACGGCCTGCCGGTGAAGGGCATCAGTAACCTGAATAATATGGCGATGTTCAGCGTCTCTGGTCCGGGCATGAAAGGCATGGTGGGAATGGCGGCGCGCGTGTTCGCGGCGATGTCCCGTGCGGGCATCTCGGTGGTGCTGATCACCCAATCGTCTTCTGAATACAGCATCAGCTTCTGCGTGCCGCAGGCCGACAGC diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.ndb b/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.ndb new file mode 100644 index 0000000000000000000000000000000000000000..dfa7d2e267e27fdcac41d817c56044823912df6b GIT binary patch literal 20480 zcmeI%u}*_f6adgGnn+wQ#(ywwF7Ez{QB0atSk$?{)y4En{D3a@50nD0P>05(Iy5;9 zy!&{M%k5#hB_bVp8+{VEZl~uF`CS|BJbm@Y&P}7mw14ZFqjezx0t5&U zAV7cs0RjXF5FoI>z|eR9NB=)$AEK`2tS>!{C;j~AzWslp33bORK$56a26G33lxW~VPj%q;tg;sy%comRr(uEevFOhThS)()6H3s z6>seN|G!V$0yw3B1Wqd;f-?%p;H&~dIH!OV&MP2> z3kt~Lq5^`rq<|zYBhZn=6$K=5RRKv{Q$P~e6_CUY1tf7(0ZD8qAckiNjy+M5)T!S#HIq0c%*}tG zZ^sMi6f4nz(xlPXYgh@Z&oHb@XD#FU)&IfI*Y>wVT&>YxPr&nmV7~6!xPjbe;7A0gSS#c1h+7YcT!^%w=s_o zQs4wev51dSQy0duf=|-XQ@Dd?*pZH%#)SS9@{s-)!w2|D{!^f4PZ_)9Tf|rLTNJwk#BTooH4p9;?y@ZRlXt;Imt1zmfvc{$?uMIgx$TZacinU3 wz6Tz9Z*opT}eelsIpMCMwH{bp6(=WgMapr&Q3tcn?%>V!Z literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.nsq b/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.nsq new file mode 100644 index 0000000000000000000000000000000000000000..7fc5e3b8e6fafd8419a1bdda10966df4ce2396be GIT binary patch literal 6297 zcmZ9Mk3W-p|Hn6KrJ`R5o!jXgnzNjv&Ns@A(o}>_DwShXLu_kiQ(xyW9GXe`!NxHA zk)0e|rLs<^qAApUQm1oQ;>2k>gtg6b7dlbPee``_*Vg%-$Ndjn&pn^7_h&fU?yf3w z)y>@g_lZg&z0+@j-e8eXQ193s( zCt37>1RIlK%0=ZTQ{iTk#0hMf?Y1WEO+e(QDM92EsP-xy!p*!m9%CkAXH}HyS zDnQM^k(N%2$!BfaKI=wncT_Do{_gI*p>sGHGN5x(vKc%)LRK0R+F#`;{hW>R`#{;VsepQ=fwXjqR%F{|vn;5!`(c&qz}?-0p>s?bvI9KDzLYIY zjBWrrxM%LeO(^@}j`)3MXmc}(7TY2#76IH1`;o{kwn#>E-ZN}=i{q!fggrAM#JEh<0j{B0I+gw}{cx7nn>c1CW_g;XD z7TIpXEyII*Eji)l?l{;zL-q$#k;KYoFkyEkBB>J><5s3c3#Lj3*D5lgyn11GCX}z2 zG<;cg_rwF8hVG7_ba@}p#h5$LiF!sIxC4EN zQHaskxbkKxPXZl26=bl`gWYL2zVN7*D{meAS{kvGRCa(zV+vlsxu;jqY0H>L%qg3P zNJZ5uO|vOknyr0)DLVl^kLcd6dLpT>qqVSrZth^ZcYgrBIRF1%O?UIE04OnSn1nI zhcJ2wxc`klU3ePkJd|F6fIGdXGe&=BD+hH~Tg$TxE#+<1B}zl)D{08|WwQ8`NG1i{ zy{V5rizzVk!iKz$!tSX1J1CzKhQRJ=7WXl5KkQHJ!*;(9^yU*|*zVJXSC(Xy03CL( z0s6ML51#1lfd04L*@yhF-EE8D>iAzvLtN`*+t1TNuD^Z*?kd_WmO$$RyXThz9d+;O z4KY&<1zovVdAPd2=pmbXBT%}f{3D=$oMCZqL|U68qTeQT=Vmwu-KPv%iw!@9}5YY!4NxOSIe$xI?yRovV3!jW_SQsllP#(6je>&y{Z-=uku zb{ezgpnk(f)fP{WWQX$jw%hI6tyLw#C8iT!#@JOY5vMyzicZ`A4MyRy3dfo@APs~2 z-&Q*fAWY5RrGqFeL2keGJ*-QncHyL4i{uDKBsf_LYSwCw?eJtY#OgzK{v+(jaGZOE zk|A%`it9^4I(zfGD*^Wivt`r9MDiua z66B`&TI9yzoXKFKOk}oEutE;G^j(FH(=0UiAkXhJ-77THU2ki*)LA>;sFt;}{Gj9C z)o^=R?PF(x5Trlr7m8EI5~jTWQmnP(Ru_|ECPG$~^eT8R2_u&x*#cuRPO!;T=+c*N z{;+r2)2mw<5s589o@e&EmvxMnwrksKFSJ!^=^LTr3Ng!X-g}sH-p$$QXq(#2iW`Wu z|4mP)>wqs$6Mqfsn5Yp#N5xDpvP#mcNS+sy@&j}P825=pC#1*Au8&Am1$pYP zsVYKtQQJ|o5V`5wFR4dQJ{n1Jy7nFh7cxDNX2WD{H7kPD(zuLgaju3qru?HTDKCzn z=wxXGoBYAd6U@pL?Q>Y_OXz6xaDhqWQ1A0umD24ySR&g!_u0i+O}ud1gM=QooFtZ} z*G@?GcDD(dR?M9$XA@%o?QFN6>m3%>7>}^;73!DPar-sHMbZWA;^68D?dyEwH{tw} z+I{2UIy#?Gty-v0^hS17O$2~%UfF+F_W z{Mvo5!+)jocU4O-8bTK*9*c=%EGHgsQ%dj%m$Zf$8N+{k*yh8fC_Ow2qjzmjWr%G1 z?q^8S70KZNt%SFAC3fvnR?U>8zgsWB=p2XG7tVGRuJ`e<#wvumrBLr#kGhM&y^K*k zVNNf&ofaNFzczY&J?1XGSiAJBQMh>t!|SPB@2bKx*`#Zjdob%O!v-ZjFbSn+RXS4> z^dgVgTWoND5x%$;+)Kcng-l&og3;|KYP|`u;Ev;Z9|gLaT~w$~tmU2rx(dnz-Sk?F zmIwFraNU3S43=so!c{I6X2mhQYI5~56`sK$xf3F({Ko}Y`P@L-WT>|~z4Ev12@H`3 z{T5q@mQP5vdB%a=`@TXZazA}I{?CSa_EY6`4!e2IBq3J53So<({9o=TV0Tk`z7gC@ zYWZmx-9x&t2Cu9;95cxEI+d?iDA4jJx`94f%UnIMxin}M&EhUa-EH^Wq86t&@oZ+d z5}uhX?wKXu|2~duSYba^TSwRp<=12M0fcQ|sAp7h<1P;0L$8ieJ*E97-}oA(hXNh0 zuIhxB{+O~>7}L-2VjO)=mCk2!NtNA^F{6)(Hmi}%%G_1f@>cqNwsKN&F3dYM{29BF zv_%2sOZGaqQJUsK`8o%;tnL7Q+t>* z$eqDJZyxW(AgL5K;EpiSo6jY>pK^SIVbi$jg6zfUe8GdXKHrun8VHFhTHex}@r z$Zh+|b|w%um4;Pl3yAIoR)%e#2pB*6v{HA(?a~2bBATF>nOp74d(V(!j6e3&AJksAqmfo%@vF(+8onYfNKddqD6vk+k zM%z!Uq!Y&HqYUbNVA!D^zz|wIROlX-R%c+}X{DdL#Q z+t8{{wlTqGkvymodG7%XTh(FM@{qpk?~k5Dv+MinE46{!y% z&m&X3{jxq;bO%~3Q*ASR*G{aQ{U7Dc*Ywo?h_)7#TK9;ypMpKcOG4mwsq>R}RbiWB zd$_25%^Zayu>|!%H-}d0aUMm^995{{u700{R-Cn294B-SZv{nPZmgAi&}E4_n{_2& zjmU9~0q?_drasm)U1Uvv8O~I1c?vx2Of4q_S#+*ok2IbSHmQ`OJGHA%1`b9w>FT-- z4DL{Lri-)Z57#yNq7PoN4^!37Wpz!jaxHXKC`PZtjb}3PtrMBE*rJrV(MHOVVQNHD z5V3?R9Bw&jD&aNI3AgJBMxb{VjSN`mTb~63y=~>$*WBt=_&B7%LXXleI=QMd>POv~ zK0_k+5iJeq#gwahd&gJ9qOj%8H5U4}MAV&>Ll{ew13eM{U0UWW&fygMXyf%GI%-I! zh29J9Jk(u{(TfYfU5mO0_k^ey3))ckf$@(LkP0>C9@V>OFz}yIKmNh#o6vKILUa73 zK-U2MA?GmF$GPS#=DvqGqd>CHJCkrAQ!W@!Vp`~;6#H%94s_ZL;)NLDu=_a+{kQWd zeW|3VJwDG|htb>A;12XHn0upo3tAp?e;L*E2dDeiQ`XSqOc$U-dHWB*o&BkE4GHMU z2V;d^1ya8EOJZt{Mlhaf#J8rW%{n8BT^>CE^q7z&y-%r9I4s6?&zWYdC%lI8#YH1J zwEP!wr_O9t2MF#tdcUR)OvO7RS4KA7asTcC>^>OS|5c0bZ07`%JCpjqSNcVg>zcNE zA3Pi`$Ph~pc{iDE!0tq^d`UDZhY0S%_wMY&tfFYRI-NEI-5uyEaA!6=As8|D>5_c3 zd>ww?lHj-4?tfif#ou-Z_n(G)ZbA7T%$;&UU*u@Hx=ZNpnrAT%&GGG2hQ`Dgxy33_ z^{=iPX!wG6mp(u(Q`1~SGx}Er?2Z&BoJ{{V8`<_*wv*QJM#ZX~&7oac{83xyAG*UC zpSbGS%E)-piay%tveo-zr!7PRUUIw3Py5dj1y&>StA9#&G%{&LJ;Rl2c3%FCf5Ede z`-NWO;~u1WrX-+r!v&PiC}@dWT~*cM_Tnx*9;K6kekovgC(sY>2D(qSlhSdlYt`3l zL%Ra_*HQXJ-RbvaA4)gq z%j`rzk0DehfICKK6|`XPZr~nIy`+wG4bSLr2>7;BxZ(SQ;J*LU1Ag)k#(oP#-6v7^ z;)m-Q9B@~GI~jFf_qE0SdH3Zr{<9*QE&~0XH1!fM$-ZZ}#W!*Ful&z3x`c?jQwhzw z+f>6ZU*}9&SMD}9m@M@9Xoabozg|-kK&yLQQ^CWfnKJp7P`38 z`j@SaqfZ0(&Xn%*PoVrN)ID3V;W9I+sC!uDo9OohrT=MYlOXOvSD(opDhF<7urwz9 z$So#DPiSyUzDrM_mYs_tM_b*k^yL|Ta>ucqn7ih^yJ<1dB|4$Y6z#hAy8V0RSa%-+ z_a93!_k;#s(v}{S-iXn&4dW>N;xAf<=8tZNJ-v8m>Budkt*U=rm9E8Y4rs@aH5Yl(nmj8 zu|Ias${PO%$$hmzcla?6sZ=)i{M7JqlDKp7ci)W|{pGtC`gt~9xth1GaCNsc446AZ z+_HCF)kiIE^_JbE$j2tp@^JItqgS_odA1YPacmcM^Jdgt@Z0)eZ`WUk%cS7GhW4Ot z-L}3t3v00Qu)9MG>V8m?aETe!4&~P*`aX&N{8K1DK9A^rL(?P2u8w%YM1-3M`sylO z!xztizWvfU8aaAwv>_ll@>uH0^xc2Li~Gj!kz+>}N*~WaZ(b0!J{Vq{sZ1I#^zWnf z*R7-UqBq~k?XErJzs#Yb^i-v?Rk5Lw6+XA;U-UJJe&~yPnf;pcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.nto b/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/blast/nucleotide/nucleotide.nto new file mode 100644 index 0000000000000000000000000000000000000000..ad19396e81aff427697a109c3c035ac73cb27f3f GIT binary patch literal 216 zcmXBFg${xM06;;oyHK$M6YTo`U-LHJc6+}dFSG(hN|dQkrAD0wO0 +KFRPGHADYTYHQKYGVRDYRGGGRSSARETAMRVAAGAIAKKYLQQEFGIEVRAYLSQMGDVAIDKVDWNEIENNDFFCPDVDKVAAFDELIRELKKEGDSIGAKIQVVATGVPVGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVRQKGSQHRDPLTPQG +>1 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKHGIVIQGCLTQMGDIPLEIKDWQQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVANGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>2 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGVEIRGCLTQMGDIPLEIKDWSQVELNPFFCPDPDKIEVLDELMRGLKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>3 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLKEKLGIEVRGYLSQLGPITCDLVDWSIVESNPFFCPDPSRLDALDEYMRALKKEGNSIGAKVTVVAQGVPAGFGEPVFDRLDADLAHALMSINAVKGIEIGDGFGVVTLKGTENRDEITKKG +>4 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGIEIRGCLTQMGDIPLEIKDWSQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKDG +>5 +VFRPGHADYTYEQKYGFRDYRGGGRSSARETAMRVAAGAIAKKYLQQKFGIVIRGCLSQMGDIPLAIKDWDQVELNPFFCADADKLDALDELMRGLKKEGDSIGAKVTVVADGVPAGWGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVKLRGSQNRDEITKAG +>6 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAQKFGVVIRGCLTQMGDIPLEIKDWDQVEQNPFFCPDPDKIEALDELMRALKKEGDSIGAKVTVVADSVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFGVVQLRGSQNRDEITTAG +>7 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>8 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>9 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>10 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQPLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>11 +MEMVARVALVQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGERMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGQSLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>12 +MEMIARVTLTQPHDAGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGDSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>13 +MEMIARVALSLPHQAGATTVPARKFFDICRGLPEGAEIAVTLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCAMPVGQPLPNHSVIVPRKGVLELMRMLDGGDSPLRIQ +>14 +SALTENDLVFALSQHAVTFADAELQQQGKSWPSLPRYFAIGRTTALALHTVSGFNIHYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELIGETLTARGADVDFCECYQRSAKYYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>15 +AALGESDLLFALSQHAVAFAQSQLHQQDRKWPRLPTYFAIGRTTALALHTVSGQKILYPQDREISEVLLQLPELQNIAGKRALILRGNGGRELIGDTLTARGAEVTFCECYQRCAIHYDGAEEAMRWQSREVTTVVVTSGEMLQ +>16 +ATLTENDLVFALSQHAVAFAHAQLQRDGRNWPASPRYFAIGRTTALALHTVSGFDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRGRELLGETLTARGAEVSFCECYQRSAKHYDGAEEAMRWHTRGVTTLVVTSGEMLQ +>17 +AALTDNDLVFALSQHAVAFAHAQLQQQELDWPVQPRYFAIGRTTALALHTVNGCDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELLGKTLTERGAEVTFCECYQRSAKHYDGAEEAMRWHSRGVTTIVVTSGEMLQ +>18 +ETLGDNDLLFALSQHAVSFAHAQLQQQGLNWPSLPHYFAIGRTTALALHTVSGHKIRYPQDREISEVLLQLPELQSIAGKRALILRGNGGRELIGQTLTSRGADVTFCECYQRSAKHYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>19 +RLLQEGDLLFALSQHAVEFAHAQLQQHAVSWPHAPRYFAIGRTTALALHTASGIDVRYPLDREISEVLLQLPELQTIAGKRALILRGNGGRELLGETLRERGADVTFVECYQRCAKHYDGAEEAMRWHARGINTLVVTSGEMLQ +>20 +IAGCQKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADIARKVAEAVERQLAELPRAGTARQALSASRLIVTKDLAQCV +>21 +IAGCKKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGIAQNVAEAVERQLAELPRAETARQALSASRLIVTKDLAQCV +>22 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGMASRVAEAVERQLAALPRAETARQALSASRLIVTRSLAQCV +>23 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGTESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPAADMARRVAEAVERQLAELPRAETARQALNASRLIVTKDLAQCV +>24 +IAGCQKVVLCSPPPIADEILYAAKLCGVQAIYKVGGAQAISALAFGTVSIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADMAKRVGDAVERQLADLPRAETARQALSASRLIVARDLDQCI +>25 +IAGCKKVVLCSPPPIADEILYAAQLCGVKEVFNVGGAQAIAALALGTESIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAKLAEGVAEAVERQLAELSRADTARQALSASRLIVAKDLAQCV +>26 +IAGCKKVVLCSPPPIADEILYAARLCGVQQVYQVGGAQAIAALAFGTETVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATTDFVASDLLSQAEHGPDSQVILLTPDSAMAQAVADAVERQLAELPRAETARQALAESRLIVARDLAQCV +>27 +SDWATMQFAAEIFDILDIPHHVEVVSAHRTPDKLFSFAENAEENGFQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLRD +>28 +SDWTTMQFAAEIFEILDVPHHVEVVSAHRTPDKLFSFAETAEENGYHVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLSN +>29 +SDWATMQFAAEILDILNVPHHVEVVSAHRTPDKLFSFAEDAESNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>30 +SDWATMQFAVEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKELHQRLNG +>31 +SDWATMQFAAEIFDILNVPHHVEVVSAHRTPDKLFSFAESAEEKGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLRQRLAD +>32 +SDWATMQFAAEIFEMLDVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKALHQRLSD +>33 +SDWATMSHAADVLDTLQIPYHVEIVSAHRTPDKLFSFAEKAKSNGFDVIIAGAGGAAHLPGMLAAKTLVPVFGVPVQSATLSGVDSLYSIVQMPKGIPVGTLAIGKAGAANAALLAAQVLALHSPAILDALTA +>34 +SDWATMQFAAEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYEVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLHQRLAE +>35 +SDWATMQFAAETAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>36 +SDWATMQHAAEILDALDVPYHVEVVSAHRTPDKLFSFAESAQHNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDDALLARLAA +>37 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLINVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>38 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>39 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDFQTDGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>40 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>41 +RTFLEELTAAEGLERYLGAKFPGAKRFSLEGGDALVPMTKEMIRHAGASGMREVVIGMAHRGRLNMLVNVLGKKPQDLFDEFAGKHGEGWGTGDVKYHQGFSADFATPGGDVHLALAFNPSHLEIVNPVVMGSVRARQDRLGDEDGSKVLPITIHGDSAIAGQGVVA +>42 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>43 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDDDNLPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>44 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGANSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFATMSRAGISVVLITQSSSEYSISFCVPPKRC +>45 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGAASDDDALPVKGISHLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>46 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASVDEDELPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>47 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>48 +VLGRNGSDYSAAVLAACLRAKCCEIWTDVDGVYTCDPRLVPDARLLKGMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPDAPGTLIGDGQKDESTPVKGITNLNNMAMINVSGPGMKGMVGMAARVFSVMSRAGISVVLITQSSSEYSISFCVPQKEL +>49 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDC +>50 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRSGISVVLITQSSSEYSISFCVPQADC +>51 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTISPIAQFQIPCLIKNTGNPQAPGTLIGASADEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRNGISVVLITQSSSEYSISFCVPQGDC +>52 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQADS diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/config.json b/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/config.json new file mode 100644 index 0000000..c79869d --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/config.json @@ -0,0 +1,12 @@ +{ + "db_name": "Locidex Database 2", + "db_version": "1.0.0", + "db_date": "04/04/2024", + "db_author": "test1", + "db_desc": "test1", + "db_num_seqs": 53, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/meta.json b/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/meta.json new file mode 100644 index 0000000..f3b88fa --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/meta.json @@ -0,0 +1,1181 @@ +{ + "info": { + "num_seqs": 53, + "is_cds": "True", + "trans_table": 11, + "dna_min_len": 220, + "dna_max_len": 350, + "dna_min_ident": 80, + "aa_min_len": 73, + "aa_max_len": 116, + "aa_min_ident": 80 + }, + "meta": { + "0": { + "seq_id": 0, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 609, + "dna_seq_len": 501, + "dna_seq_hash": "4811bc98591c74954ace3cb487330482", + "aa_seq_len": 167, + "aa_seq_hash": "a8fbcf8179d8548f980b7b15f29de1d4", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "1": { + "seq_id": 1, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 614, + "dna_seq_len": 501, + "dna_seq_hash": "b66979eaf680fab872ffe1bde4c092d6", + "aa_seq_len": 167, + "aa_seq_hash": "3e034a4d80ac27352822774abd9319df", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "2": { + "seq_id": 2, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 618, + "dna_seq_len": 501, + "dna_seq_hash": "f02a36ff6df05f9bf38428fa22a035da", + "aa_seq_len": 167, + "aa_seq_hash": "e2d30bb18231528ef65c34880704dd7a", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "3": { + "seq_id": 3, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 619, + "dna_seq_len": 501, + "dna_seq_hash": "bee9d7360aa8e9b840fb29afa1de2c2e", + "aa_seq_len": 167, + "aa_seq_hash": "c3f71f5780b5f1031aaf21697a482ee3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "4": { + "seq_id": 4, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 620, + "dna_seq_len": 501, + "dna_seq_hash": "5b7956485455fdbc7c86d4834a8f7406", + "aa_seq_len": 167, + "aa_seq_hash": "60ce8f3b07f53378580ee528910ee623", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "5": { + "seq_id": 5, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 624, + "dna_seq_len": 501, + "dna_seq_hash": "98ba14aac74444a253123aff3d20c69f", + "aa_seq_len": 167, + "aa_seq_hash": "bab41702c7c209def93f9c9930c27086", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "6": { + "seq_id": 6, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 716, + "dna_seq_len": 501, + "dna_seq_hash": "6b9166d5d996897cae3cc288d7969d78", + "aa_seq_len": 167, + "aa_seq_hash": "5bc86c0a9226224922cbd6219c182622", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "7": { + "seq_id": 7, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "d401763f2df6e5fe87e1e07d3c170fe6", + "aa_seq_len": 167, + "aa_seq_hash": "928ad814483bbffda3e3b3a0aa4ca072", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "8": { + "seq_id": 8, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 120, + "dna_seq_len": 501, + "dna_seq_hash": "9c50d73cc4ef8d0a447f07ad150ad8cc", + "aa_seq_len": 167, + "aa_seq_hash": "928ad814483bbffda3e3b3a0aa4ca072", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "9": { + "seq_id": 9, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 555, + "dna_seq_len": 501, + "dna_seq_hash": "fab4f658dfba0cd0174a4a87998cf948", + "aa_seq_len": 167, + "aa_seq_hash": "a081905e659429db1f40e145932ae277", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "10": { + "seq_id": 10, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 557, + "dna_seq_len": 501, + "dna_seq_hash": "acb2ed027124e2a54b7734cd538590f1", + "aa_seq_len": 167, + "aa_seq_hash": "970184ec5ccc9f02ee3c858d2687cc18", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "11": { + "seq_id": 11, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 558, + "dna_seq_len": 501, + "dna_seq_hash": "ad996a122298d55ab3d4b2ea7a4974b0", + "aa_seq_len": 167, + "aa_seq_hash": "945455021fffea9b793d16af630db961", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "12": { + "seq_id": 12, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 563, + "dna_seq_len": 501, + "dna_seq_hash": "815242e67f31f4e2968f7f0620565125", + "aa_seq_len": 167, + "aa_seq_hash": "1b117ca76a022ae63d6f7bfe2ead289e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "13": { + "seq_id": 13, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 633, + "dna_seq_len": 501, + "dna_seq_hash": "532742ae95c046241789d79e68e30b7a", + "aa_seq_len": 167, + "aa_seq_hash": "fff51d2396f3da88a775416b4c6d14b6", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "14": { + "seq_id": 14, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 316, + "dna_seq_len": 432, + "dna_seq_hash": "3922f6256f2891400db415013eb0b208", + "aa_seq_len": 144, + "aa_seq_hash": "0af9d546dfcaf93373a8919df3e30323", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "15": { + "seq_id": 15, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 343, + "dna_seq_len": 432, + "dna_seq_hash": "f76c13e33ad5b502dfe64181dbdf2378", + "aa_seq_len": 144, + "aa_seq_hash": "32484f065f9013aaa5b3c694cc99cdbf", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "16": { + "seq_id": 16, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 472, + "dna_seq_len": 438, + "dna_seq_hash": "80bea3abd165ee14e51bc9e9779fc6a1", + "aa_seq_len": 146, + "aa_seq_hash": "4e9cc2d289f1c946738cc8e6e4ef1186", + "dna_min_len": 306, + "dna_max_len": 744, + "aa_min_len": 102, + "aa_max_len": 248, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "17": { + "seq_id": 17, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 489, + "dna_seq_len": 432, + "dna_seq_hash": "83a314185d9ff0bf7c2953d30979e7eb", + "aa_seq_len": 144, + "aa_seq_hash": "5f9fc3707789543f2f14b0f1a555a05c", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "18": { + "seq_id": 18, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 497, + "dna_seq_len": 432, + "dna_seq_hash": "c70622b317de74bdaf57eb8bb5134537", + "aa_seq_len": 144, + "aa_seq_hash": "56b3d46d3e517eb7f83f089f9ed5ae2a", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "19": { + "seq_id": 19, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 498, + "dna_seq_len": 432, + "dna_seq_hash": "f284b11b34de688e2ef54c1b73936595", + "aa_seq_len": 144, + "aa_seq_hash": "da558cdebd900031d0df8f58ef01454e", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "20": { + "seq_id": 20, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "9f762c246c542c52c94c5022ca62311c", + "aa_seq_len": 167, + "aa_seq_hash": "447381a0d286fa1037b5499e2242819a", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "21": { + "seq_id": 21, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 754, + "dna_seq_len": 501, + "dna_seq_hash": "65b434bea0d1939d2b748dbc5dd6df8b", + "aa_seq_len": 167, + "aa_seq_hash": "2b685aa7892794b69c9faa20c58a9183", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "22": { + "seq_id": 22, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 757, + "dna_seq_len": 501, + "dna_seq_hash": "eccfc35078428e44e5dd3e85d9ebf1fe", + "aa_seq_len": 167, + "aa_seq_hash": "35fa89ee4cd8689b89d553157471afe0", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "23": { + "seq_id": 23, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 759, + "dna_seq_len": 501, + "dna_seq_hash": "ce01d780cd0ffe3197f708d7048a473b", + "aa_seq_len": 167, + "aa_seq_hash": "bc0edd26ea6032cc4939e8cbc17a12d3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "24": { + "seq_id": 24, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 768, + "dna_seq_len": 501, + "dna_seq_hash": "23377e95fe00bf6a16b51fe8929a938a", + "aa_seq_len": 167, + "aa_seq_hash": "9fb34628ef67396ed38c755280e04f7e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "25": { + "seq_id": 25, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 838, + "dna_seq_len": 501, + "dna_seq_hash": "8478cdd016753651cd73afc4ad20c7df", + "aa_seq_len": 167, + "aa_seq_hash": "6512669779521a6792ecdae3088467f7", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "26": { + "seq_id": 26, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 907, + "dna_seq_len": 501, + "dna_seq_hash": "ab935d39fffeff601d95a8362ba454f3", + "aa_seq_len": 167, + "aa_seq_hash": "1c277aef51e883e29ee8b489c525ea1b", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "27": { + "seq_id": 27, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 24, + "dna_seq_len": 399, + "dna_seq_hash": "a7af783dc7084f1b8bc593aa29f80003", + "aa_seq_len": 133, + "aa_seq_hash": "46a0c532edb92303b1b9d12a80056a60", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "28": { + "seq_id": 28, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 48, + "dna_seq_len": 399, + "dna_seq_hash": "9fb313e6232b0d0e14d2fc4be7c409f7", + "aa_seq_len": 133, + "aa_seq_hash": "0e56efdd1f7fbaf132524616e29d98ca", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "29": { + "seq_id": 29, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 317, + "dna_seq_len": 399, + "dna_seq_hash": "50cd750e2f6860dd489040f1d5f64f9b", + "aa_seq_len": 133, + "aa_seq_hash": "18e887a66ce56a930dbf8db48b406596", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "30": { + "seq_id": 30, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 487, + "dna_seq_len": 399, + "dna_seq_hash": "0e1384e36f3897f65690f9230d2bcd73", + "aa_seq_len": 133, + "aa_seq_hash": "20c9a488aa6542257a151ced866d2f8f", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "31": { + "seq_id": 31, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 608, + "dna_seq_len": 399, + "dna_seq_hash": "e180fd1852382c132851674a9e379c03", + "aa_seq_len": 133, + "aa_seq_hash": "c7da76b50946241fe125348a19a9b6a3", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "32": { + "seq_id": 32, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 611, + "dna_seq_len": 399, + "dna_seq_hash": "0ec842f985e93041c928ab7bb137295d", + "aa_seq_len": 133, + "aa_seq_hash": "be3990f2abaa8780b14e62d4fc8cd82a", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "33": { + "seq_id": 33, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 612, + "dna_seq_len": 399, + "dna_seq_hash": "9d42e484ea2936f87312f07abf0ad84a", + "aa_seq_len": 133, + "aa_seq_hash": "7af624e3930c7a5ab7785b08d925081c", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "34": { + "seq_id": 34, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 619, + "dna_seq_len": 399, + "dna_seq_hash": "02949c6f858f3cc5de1b13c9f5a40705", + "aa_seq_len": 133, + "aa_seq_hash": "52d120d4090a22e450633e01e4ccb729", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "35": { + "seq_id": 35, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 631, + "dna_seq_len": 315, + "dna_seq_hash": "c4715d7df9a9eebfe5a334dd55ee469b", + "aa_seq_len": 105, + "aa_seq_hash": "31aa38918b303bf67374188e11413e59", + "dna_min_len": 220, + "dna_max_len": 535, + "aa_min_len": 73, + "aa_max_len": 178, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "36": { + "seq_id": 36, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 724, + "dna_seq_len": 399, + "dna_seq_hash": "782d08e7ee8a031a1402020e708bfbbc", + "aa_seq_len": 133, + "aa_seq_hash": "b5f9063808b8be839e7f169bf73c88e4", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "37": { + "seq_id": 37, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "481b6454f33fae7875b4978c14094ec3", + "aa_seq_len": 167, + "aa_seq_hash": "fa04457773c66ae015014e915af2516d", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "38": { + "seq_id": 38, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 30, + "dna_seq_len": 501, + "dna_seq_hash": "79048d21794195277a6af839be13e6e1", + "aa_seq_len": 167, + "aa_seq_hash": "186c53cb5c2bf0b7ecac853c6067065d", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "39": { + "seq_id": 39, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 281, + "dna_seq_len": 501, + "dna_seq_hash": "f10d273aa97d5556a43b96721d666975", + "aa_seq_len": 167, + "aa_seq_hash": "4172d5e8c8265884fe5479e10527cb02", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "40": { + "seq_id": 40, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 399, + "dna_seq_len": 501, + "dna_seq_hash": "1839775cc7c29412648ec7b004e1a417", + "aa_seq_len": 167, + "aa_seq_hash": "c4cfbbf5c5814829188f4f404f312bd3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "41": { + "seq_id": 41, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 571, + "dna_seq_len": 501, + "dna_seq_hash": "fce3e68952108e415579b3ad24a3f150", + "aa_seq_len": 167, + "aa_seq_hash": "43372b6526524f5ed4542be83b5b8614", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "42": { + "seq_id": 42, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 686, + "dna_seq_len": 501, + "dna_seq_hash": "629ea0cbfe0d2e9f34b1ca034a6c55fd", + "aa_seq_len": 167, + "aa_seq_hash": "c4cfbbf5c5814829188f4f404f312bd3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "43": { + "seq_id": 43, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "eaec644b411bd0b3ab1e086fbabd29c9", + "aa_seq_len": 167, + "aa_seq_hash": "bfe756f2f421db752907a171f3a44d69", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "44": { + "seq_id": 44, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 31, + "dna_seq_len": 501, + "dna_seq_hash": "97e4acce4e840b1c48de51f55fccf620", + "aa_seq_len": 167, + "aa_seq_hash": "be9296cb1ea9443fb43c0f967d107988", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "45": { + "seq_id": 45, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 208, + "dna_seq_len": 501, + "dna_seq_hash": "fbc6cb34cddfb1fe6a7806d5f7613259", + "aa_seq_len": 167, + "aa_seq_hash": "b788ec581475c9ba71d997b2db6e1def", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "46": { + "seq_id": 46, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 630, + "dna_seq_len": 501, + "dna_seq_hash": "ce58c0cacd4e8d9fa4867d11f2add864", + "aa_seq_len": 167, + "aa_seq_hash": "c062c5c88bdebdf2883e06fe6823c71c", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "47": { + "seq_id": 47, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 631, + "dna_seq_len": 501, + "dna_seq_hash": "949426df5430f94547459d06c786d77b", + "aa_seq_len": 167, + "aa_seq_hash": "dac50e2b5df83fe87c9826ecf99d568e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "48": { + "seq_id": 48, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 632, + "dna_seq_len": 501, + "dna_seq_hash": "9a187a6b3e4675fe12ea213c7a23577c", + "aa_seq_len": 167, + "aa_seq_hash": "6536824faaa7880cfb44a6cd1ed057c9", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "49": { + "seq_id": 49, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 633, + "dna_seq_len": 501, + "dna_seq_hash": "7be8b9732228c1f82630b547d7011a5e", + "aa_seq_len": 167, + "aa_seq_hash": "1eac2cb94b8f619df1c9b0f3369f4a96", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "50": { + "seq_id": 50, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 637, + "dna_seq_len": 501, + "dna_seq_hash": "1895acdf991b49a885873fe82ce9ca85", + "aa_seq_len": 167, + "aa_seq_hash": "9fe9521d0bf495570a0fd425c0e48764", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "51": { + "seq_id": 51, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 638, + "dna_seq_len": 501, + "dna_seq_hash": "9776bbec78b5214d3dfca0d32b395d4b", + "aa_seq_len": 167, + "aa_seq_hash": "2914d167cc3579348e36d16afc628a39", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "52": { + "seq_id": 52, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 748, + "dna_seq_len": 501, + "dna_seq_hash": "6cf9d69644c819d9ecd3a0fd090977fc", + "aa_seq_len": 167, + "aa_seq_hash": "cf0168a601a4f5792c7326a2da650edb", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + } + } +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/results.json b/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/results.json new file mode 100644 index 0000000..a59d695 --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_multiple/pass_two_db/results.json @@ -0,0 +1,14 @@ +{ + "analysis_start_time": "2024-04-04 14:12:12", + "parameters": { + "input_file": "locidex/example/build_db_mlst_in/senterica.mlst.txt", + "outdir": "/tmp/pytest-of-mwells/pytest-82/build0", + "name": "Locidex Database 3", + "db_ver": "1.0.0", + "db_desc": "", + "author": "", + "date": "", + "force": true + }, + "analysis_end_time": "2024-04-04 14:12:12" +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.fasta b/locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.fasta new file mode 100644 index 0000000..a03cb89 --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.fasta @@ -0,0 +1,106 @@ +>0 +AAATTCCGTCCCGGACATGCGGACTACACCTATCACCAAAAATACGGTGTGCGAGATTACCGTGGCGGCGGCCGTTCATCGGCACGTGAAACCGCCATGCGTGTTGCTGCGGGAGCGATTGCCAAAAAATATCTGCAGCAAGAGTTTGGCATTGAAGTGCGTGCTTACTTGTCGCAAATGGGGGATGTCGCGATTGATAAAGTGGATTGGAATGAGATTGAAAACAACGATTTCTTCTGTCCTGATGTCGATAAAGTGGCTGCGTTTGACGAGCTGATCCGCGAGCTGAAAAAAGAAGGCGATTCGATCGGCGCGAAAATCCAAGTGGTCGCTACAGGCGTGCCGGTTGGACTGGGTGAGCCTGTGTTTGATCGCTTAGATGCGGATATTGCCCATGCCTTGATGAGCATCAACGCCGTGAAAGGAGTCGAGATTGGTGATGGCTTTGATGTGGTGCGCCAAAAAGGCAGCCAACACCGTGACCCGCTCACTCCACAAGGT +>1 +GTTTTCCGCCCGGGCCATGCCGACTATACCTACGAGCAGAAATACGGTCTGCGCGATTACCGTGGCGGCGGTCGTTCTTCCGCCCGTGAAACGGCGATGCGCGTCGCGGCTGGCGCGATTGCTAAAAAATATCTGGCGGAGAAACACGGCATCGTCATTCAGGGGTGTCTGACCCAGATGGGCGATATTCCGCTTGAAATCAAAGACTGGCAGCAGGTTGAACAAAACCCGTTTTTCTGTCCTGATCCAGATAAAATCGACGCGCTGGATGAACTGATGCGCGCCCTGAAGAAAGAGGGCGATTCGATTGGGGCAAAAGTGACCGTCGTGGCAAACGGCGTTCCGGCCGGGCTTGGCGAACCGGTCTTTGACCGTCTGGATGCGGACATCGCTCATGCGCTGATGAGCATCAACGCGGTAAAAGGCGTGGAGATTGGCGATGGGTTTGATGTGGTCGCGTTGCGAGGCAGCCAGAATCGCGATGAAATTACCAAAGAGGGC +>2 +GTTTTCCGTCCAGGACACGCTGACTATACCTATGAGCAGAAATATGGCCTGCGCGACTACCGTGGCGGCGGACGTTCATCCGCGCGTGAAACGGCGATGCGCGTTGCGGCTGGCGCGATTGCCAAAAAATATCTGGCGGAAAAATTCGGCGTTGAAATTCGCGGCTGTCTGACGCAGATGGGGGATATTCCGCTGGAGATCAAAGACTGGTCTCAGGTGGAGCTTAACCCGTTCTTTTGTCCAGACCCGGATAAAATCGAAGTGCTGGACGAACTGATGCGCGGGCTGAAGAAAGAGGGCGACTCCATCGGGGCAAAAGTGACCGTTGTTGCAAGCGGCGTACCGGCGGGTCTCGGCGAACCTGTATTCGACCGTCTGGATGCCGACATCGCCCATGCGCTGATGAGCATTAACGCCGTTAAGGGCGTTGAGATTGGCGACGGTTTTGACGTTGTTGCGCTGCGCGGCAGTCAGAACCGCGATGAGATCACCAAAGAAGGT +>3 +GTTTTCCGCCCAGGGCATGCTGATTATACCTATGAACAAAAATATGGTTTGCGTGATTATCGTGGTGGTGGACGTTCTTCTGCTCGTGAAACGGCAATGCGTGTCGCCGCAGGTGCGATTGCTAAAAAATATCTAAAAGAGAAATTAGGCATCGAAGTTCGAGGATATCTTTCTCAGCTAGGACCTATTACATGTGATCTTGTTGATTGGTCTATTGTTGAAAGCAATCCATTTTTCTGTCCTGATCCTTCACGTTTAGATGCGCTTGATGAATACATGCGTGCACTTAAAAAAGAAGGTAATTCTATTGGTGCAAAAGTCACTGTGGTTGCACAGGGTGTACCTGCTGGATTTGGTGAACCTGTCTTTGATCGATTAGATGCTGATTTAGCGCATGCTTTGATGAGTATCAATGCTGTCAAAGGTATAGAAATTGGTGATGGATTTGGTGTTGTAACATTAAAAGGTACAGAAAACCGAGATGAAATCACTAAAAAGGGA +>4 +GTTTTCCGTCCAGGCCATGCCGATTACACCTACGAACAAAAATACGGTCTGCGCGATTATCGCGGCGGCGGGCGCTCTTCCGCCCGCGAAACCGCCATGCGCGTGGCGGCAGGGGCGATTGCAAAAAAATATCTCGCCGAGAAATTTGGCATTGAGATTCGCGGCTGCCTGACCCAGATGGGTGACATTCCGCTGGAAATCAAAGACTGGTCGCAGGTCGAGCAAAATCCGTTTTTCTGCCCGGACCCGGACAAAATCGACGCGTTAGATGAACTGATGCGCGCGCTGAAAAAAGAGGGCGACTCCATCGGCGCGAAAGTCACCGTTGTTGCCAGTGGCGTCCCCGCCGGACTTGGCGAGCCGGTCTTTGACCGCCTGGATGCCGACATCGCCCATGCGCTGATGAGCATCAACGCGGTGAAAGGCGTAGAAATTGGTGATGGTTTTGACGTGGTGGCGCTGCGTGGCAGCCAGAACCGCGACGAAATCACCAAAGACGGT +>5 +GTTTTCCGTCCTGGTCACGCCGACTATACCTACGAACAAAAATATGGCTTTCGCGACTATCGCGGCGGCGGGCGTTCTTCCGCGCGTGAAACCGCGATGCGCGTGGCGGCAGGGGCAATTGCCAAAAAATATCTCCAGCAGAAATTCGGCATCGTTATCCGCGGCTGTCTGTCCCAGATGGGCGACATTCCGCTGGCAATCAAAGACTGGGATCAGGTAGAGCTCAACCCGTTCTTCTGCGCCGATGCCGACAAGCTGGACGCGCTGGATGAGCTGATGCGTGGCCTGAAAAAAGAGGGCGACTCCATTGGTGCGAAAGTCACCGTGGTGGCCGACGGCGTGCCGGCTGGCTGGGGCGAGCCGGTATTTGACCGCCTTGACGCCGACATCGCCCACGCGCTGATGAGCATCAACGCGGTGAAAGGCGTCGAAATCGGCGACGGTTTTGACGTGGTCAAGCTTCGCGGCAGCCAGAACCGCGACGAAATCACGAAGGCGGGT +>6 +GTGTTCCGTCCGGGGCACGCGGATTACACCTACGAACAAAAATACGGCCTGCGCGACTATCGCGGCGGCGGGCGTTCATCCGCCCGTGAAACCGCCATGCGCGTCGCGGCAGGCGCTATCGCCAAAAAATATCTGGCGCAGAAATTCGGCGTGGTGATTCGCGGCTGCCTGACCCAGATGGGTGATATTCCGCTGGAAATCAAAGACTGGGATCAGGTAGAGCAAAACCCGTTCTTCTGCCCGGACCCGGATAAAATCGAGGCGCTGGATGAGCTGATGCGCGCTCTGAAAAAAGAGGGCGATTCCATCGGCGCGAAAGTCACCGTGGTGGCCGACAGCGTGCCCGCCGGGCTTGGCGAGCCGGTATTTGACCGCCTGGACGCCGATATCGCCCACGCGCTGATGAGCATTAACGCCGTGAAGGGCGTGGAAATCGGCGACGGTTTCGGCGTGGTGCAACTGCGCGGCAGCCAGAACCGCGACGAAATCACCACTGCCGGT +>7 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAGCCAGGCGCCACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGCGGCCTGCCGGAGGGCGCGGAGATTGCCGTTCAGTTGGAAGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGCCGCTTCTCGCTGTCTACGCTGCCTGCCGCCGATTTCCCGAATCTTGACGACTGGCAAAGCGAAGTTGAATTTACGCTGCCGCAGGCCACGATGAAGCGCCTGATTGAAGCGACCCAGTTTTCGATGGCCCATCAGGATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAGCGAACTGCGCACTGTTGCGACCGACGGCCACCGTCTGGCGGTGTGCTCAATGCCGCTGGAGGCGTCTTTACCTAGCCACTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGTGGCGAAAACCCGCTGCGCGTGCAG +>8 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAACCCGGCGCTACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCCTGCCGGAAGGGGCGGAAATCGCCGTTCAGCTGGAGGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGTCGCTTTTCGCTGTCTACCTTACCGGCAGCAGACTTCCCGAATCTGGATGACTGGCAAAGCGAAGTGGAATTCACGCTGCCTCAGGCGACGATGAAACGCTTGATTGAGGCCACCCAGTTTTCGATGGCCCATCAGGACGTGCGCTACTACCTGAACGGTATGTTGTTTGAAACGGAAGGAAGCGAACTGCGCACCGTCGCGACCGACGGCCACCGTCTGGCGGTCTGTTCAATGCCGCTGGAGGCCTCTTTACCGAGCCATTCAGTGATCGTACCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTTGACGGCGGTGAAAATCCACTGCGTGTACAG +>9 +ATGGAAATGGTGGCGCGCGTTGCGTTGATTCAGCCTCATGAACCAGGCGCAACTACCGTCCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCTTGCCGGAAGGGGCTGAAATTGCCGTCCAGCTGGAAGGCGATCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTCTCGCTTTCCACGCTGCCTGCCGCCGATTTCCCTAATCTGGATGACTGGCAGAGCGAAGTCGAATTCACCCTGCCGCAGGCAACGATGAAGCGCCTGATTGAAGCCACCCAGTTCTCAATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAGACTGAAGGTGAAGAGTTGCGTACCGTCGCGACCGACGGTCACCGTCTGGCGGTCTGCTCTATGCCGGTCGGGCAATCTCTGCCTAACCATTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAGCTGATGCGTATGCTCGACGGCGGCGAAACCCCGCTGCGCGTACAG +>10 +ATGGAGATGGTGGCGCGCGTGGCGCTGATCCAGCCTCATGAACCTGGTGCGACCACCGTTCCGGCGCGTAAATTCTTCGATATTTGCCGTGGATTACCAGAAGGGGCGGAAATTGCCGTTCAACTGGAAGGCGACCGTATGCTGGTGCGTTCTGGCCGCAGCCGTTTCTCGCTGTCTACGCTGCCTGCCGCCGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTCGAATTCACCCTGCCACAGGCGACAATGAAGCGCCTGATTGAAGCCACGCAGTTTTCGATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAAACCGAAGGGGAAGAGTTGCGTACCGTGGCGACCGACGGTCACCGCCTGGCGGTCTGTTCAATGCCTGTCGGTCAGCCGTTGCCTAGCCATTCGGTGATCGTACCGCGTAAAGGTGTGATTGAACTGATGCGTATGCTCGACGGCGGCGATAACCCGCTGCGCGTGCAG +>11 +ATGGAAATGGTGGCACGCGTTGCGCTGGTTCAGCCGCACGAACCAGGGGCGACGACCGTTCCAGCGCGCAAATTCTTTGATATCTGCCGTGGTCTGCCTGAAGGCGCGGAAATTGCCGTGCAGCTGGAAGGTGAGCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTTTCGCTGTCTACCCTGCCAGCGGCGGATTTCCCGAATCTCGATGACTGGCAGAGCGAAGTCGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCGACCCAGTTTTCTATGGCGCATCAGGACGTTCGCTATTACTTAAACGGTATGCTGTTTGAAACCGAAGGTGAAGAACTGCGCACCGTGGCGACCGACGGCCACCGTCTGGCAGTCTGTTCAATGCCAATTGGTCAATCTTTGCCAAGCCATTCGGTGATCGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGCGGCGACAATCCGCTGCGCGTGCAG +>12 +ATGGAAATGATCGCGCGCGTTACGCTGACTCAGCCGCACGACGCGGGCGCGACCACGGTTCCGGCACGTAAATTCTTTGATATTTGCCGTGGGCTGCCGGAAGGCGCTGAAATCGCAGTGCAGCTGGAGGGCGACCGCATGCTGGTGCGCTCTGGCCGCAGCCGTTTCTCCCTCTCCACGTTGCCCGCTGCGGACTTCCCGAACCTGGATGACTGGCAGAGCGAAGTTGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCCACGCAGTTCTCCATGGCGCATCAGGACGTTCGTTACTACTTAAACGGCATGCTGTTCGAAACCGAAGGTGAAGAGCTGCGTACCGTGGCGACCGACGGTCACCGTCTGGCGGTTTGTTCCATGCCGATTGGCGATTCACTGCCAAACCATTCGGTGATCGTACCGCGTAAAGGCGTAATTGAACTGATGCGTATGCTCGACGGCGGTGAAACGCCGCTGCGCGTGCAG +>13 +ATGGAGATGATCGCGCGTGTGGCGCTGTCGCTACCGCACCAGGCGGGCGCGACCACCGTGCCGGCGCGCAAATTCTTCGATATCTGCCGTGGCTTGCCGGAAGGGGCGGAAATCGCCGTTACGCTGGAAGGCGACAGAATGCTGGTGCGCTCCGGGCGCAGCCGCTTCTCGCTGTCTACGTTACCGGCGGCAGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTGGAGTTCACGCTCCCGCAGGCCACCATGAAGCGCCTGATCGAAGCGACCCAGTTCTCCATGGCCCATCAGGACGTGCGGTATTACCTGAACGGGATGCTGTTTGAAACCGAAGGCGAAGAGCTGCGCACCGTGGCGACTGACGGCCACCGTCTGGCGGTATGCGCGATGCCGGTAGGCCAACCGCTGCCAAACCATTCGGTGATTGTACCGCGTAAAGGCGTGCTGGAGCTGATGCGTATGCTCGATGGCGGCGACAGCCCGCTGCGCATTCAG +>14 +TCGGCGCTGACGGAAAACGATCTGGTCTTCGCCCTCTCGCAGCACGCCGTCACCTTTGCAGATGCCGAGCTTCAGCAACAAGGGAAAAGCTGGCCCTCCCTTCCGCGTTATTTTGCCATTGGTCGCACAACGGCGCTGGCGCTGCATACCGTTAGCGGTTTCAATATTCACTACCCTCTGGATCGGGAAATTAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGCGCGCTTATATTACGCGGCAATGGTGGCCGTGAGCTGATAGGTGAAACCCTGACAGCACGCGGAGCTGATGTCGATTTTTGTGAATGTTATCAACGCAGTGCAAAATATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCTCGTGGTGTGACCACGGTGGTTGTCACCAGCGGAGAGATGCTACAA +>15 +GCGGCGCTGGGGGAGAGCGATCTGTTGTTTGCCCTCTCGCAACACGCGGTTGCTTTTGCCCAATCACAGCTGCATCAGCAAGATCGTAAATGGCCCCGACTACCTACTTATTTCGCCATTGGACGCACCACCGCACTGGCGCTACATACCGTAAGCGGACAGAAGATTCTCTACCCGCAGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGTGCGCTGATATTACGTGGCAATGGCGGTCGTGAGCTAATTGGGGATACCCTGACGGCGCGCGGTGCTGAGGTCACTTTTTGTGAATGTTATCAACGATGCGCAATCCATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCCCGCGAGGTGACGACGGTCGTTGTTACCAGCGGTGAAATGTTGCAG +>16 +GCGACGTTGACGGAAAACGATCTGGTTTTTGCCCTTTCACAGCACGCCGTCGCCTTTGCCCACGCCCAACTCCAGCGAGATGGTCGAAACTGGCCTGCGTCGCCGCGCTATTTCGCGATTGGTCGCACCACGGCGCTCGCCCTTCATACCGTTAGCGGGTTCGATATTCGTTATCCATTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGCGCGCTGATTTTGCGTGGCAATGGCGGTCGCGGTCGCGAACTGCTGGGCGAAACCCTGACAGCTCGCGGAGCCGAAGTCAGTTTTTGTGAATGTTATCAACGAAGTGCGAAACATTACGATGGCGCAGAAGAGGCGATGCGCTGGCACACTCGCGGCGTAACGACGCTTGTTGTCACCAGCGGCGAGATGTTGCAA +>17 +GCGGCGCTCACGGACAACGATCTGGTGTTCGCCCTCTCGCAACACGCCGTCGCCTTTGCCCACGCCCAACTGCAACAGCAGGAGCTGGACTGGCCTGTGCAACCACGCTACTTCGCCATCGGGCGCACAACGGCGCTGGCGCTGCATACCGTTAACGGATGCGATATTCGCTATCCTCTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGAGCGCTTATTTTACGGGGCAACGGCGGGCGTGAACTGTTAGGCAAAACCCTCACAGAACGCGGCGCTGAAGTCACCTTTTGTGAATGTTATCAACGCAGTGCAAAACATTACGATGGCGCGGAAGAGGCGATGCGCTGGCACTCTCGCGGCGTGACGACGATTGTTGTCACCAGCGGCGAAATGCTGCAA +>18 +GAAACACTTGGCGATAACGATCTGCTCTTTGCACTTTCTCAACATGCAGTGTCATTCGCCCATGCGCAGTTGCAACAGCAGGGGCTAAACTGGCCATCACTTCCGCATTATTTCGCTATTGGCCGTACTACCGCTCTCGCCCTGCACACCGTAAGCGGACATAAGATTCGCTATCCACAAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCGGAATTACAAAGTATTGCGGGAAAACGCGCACTTATTTTGCGCGGTAACGGCGGCCGTGAATTGATCGGTCAGACGCTGACATCACGTGGTGCCGACGTTACTTTTTGTGAATGTTATCAACGCAGTGCGAAGCATTACGATGGTGCGGAAGAAGCTATGCGCTGGCAGTCTCGCGGCGTAACAACCGTCGTTGTAACCAGCGGTGAAATGCTGCAA +>19 +CGTCTCTTGCAGGAAGGCGATCTGCTCTTTGCGCTGTCGCAGCATGCCGTGGAGTTTGCCCATGCGCAGCTGCAACAGCATGCCGTTAGCTGGCCTCACGCCCCCCGCTATTTCGCCATCGGGCGCACCACGGCGCTGGCCTTACATACCGCGAGCGGAATCGATGTTCGTTACCCGTTAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAACCATTGCCGGAAAGCGCGCGCTCATTTTGCGCGGCAACGGTGGCCGCGAACTGCTGGGCGAAACGCTGCGCGAACGCGGCGCAGACGTGACGTTTGTGGAGTGCTATCAGCGCTGTGCGAAACACTATGATGGCGCGGAAGAAGCAATGCGCTGGCACGCCCGCGGTATTAATACGCTGGTGGTCACCAGCGGTGAAATGTTACAA +>20 +ATTGCGGGATGCCAGAAGGTGGTTCTGTGCTCGCCGCCACCCATCGCTGATGAAATCCTCTATGCGGCGCAACTGTGTGGCGTGCAGGAAATCTTTAACGTCGGCGGCGCGCAGGCGATTGCCGCTCTGGCCTTCGGCAGCGAGTCCGTACCGAAAGTGGATAAAATTTTTGGCCCCGGCAACGCCTTTGTAACCGAAGCCAAGCGTCAGGTCAGCCAGCGTCTCGACGGCGCGGCTATCGATATGCCAGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCAACACCGGATTTCGTCGCTTCTGACCTGCTCTCCCAGGCTGAGCACGGCCCGGATTCCCAGGTGATCCTGCTGACGCCGGATGCTGACATTGCCCGCAAGGTGGCGGAGGCGGTAGAACGTCAACTGGCGGAACTGCCGCGCGCGGGCACCGCCCGGCAGGCCCTGAGCGCCAGTCGTCTGATTGTGACCAAAGATTTAGCGCAGTGCGTC +>21 +ATTGCCGGATGCAAAAAAGTGGTGTTGTGCTCGCCACCGCCTATCGCGGATGAAATCCTTTACGCTGCGCAGCTGTGCGGCGTGCAGGAAATCTTCAACGTCGGCGGCGCCCAGGCCATTGCCGCTCTGGCGTTCGGCAGCGAATCCGTGCCAAAAGTGGACAAAATTTTTGGCCCCGGCAACGCGTTTGTCACCGAGGCGAAACGCCAGGTCAGCCAGCGTCTCGACGGCGCGGCAATTGATATGCCTGCCGGCCCTTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCCACGCCAGATTTCGTGGCGTCTGACCTGCTCTCTCAGGCGGAACACGGCCCGGATTCTCAGGTCATCCTGCTGACCCCGGATGCCGGTATTGCGCAGAACGTCGCAGAGGCCGTCGAACGCCAGTTAGCGGAGTTACCGCGTGCAGAAACGGCGCGTCAGGCATTAAGCGCCAGCCGTCTGATCGTGACGAAAGACTTAGCCCAGTGCGTC +>22 +ATTGCAGGCTGTAAAAAAGTGGTGTTGTGCTCTCCCCCACCTATCGCCGATGAAATTCTGTATGCTGCGCAGCTCTGCGGCGTACAGGATGTGTTTAACGTTGGGGGCGCACAAGCTATTGCCGCGCTGGCATTTGGCAGTGAATCCGTGCCGAAAGTGGACAAAATTTTTGGCCCCGGTAATGCCTTTGTGACCGAAGCCAAACGTCAGGTGAGTCAGCGTCTGGACGGCGCCGCCATCGATATGCCAGCAGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCCACGCCGGATTTCGTTGCCTCTGACTTACTCTCGCAGGCCGAACACGGCCCCGATTCCCAAGTGATCCTGCTGACGCCGGATGCCGGTATGGCCAGCCGGGTTGCTGAAGCAGTAGAACGCCAGCTTGCAGCGCTGCCACGCGCTGAAACCGCGCGGCAGGCGTTAAGCGCCAGTCGTCTGATTGTCACCCGCTCCCTTGCGCAATGCGTA +>23 +ATTGCGGGCTGTAAAAAAGTGGTGCTGTGCTCACCGCCGCCGATTGCCGATGAGATCCTTTACGCGGCGCAGCTGTGCGGTGTGCAGGACGTGTTTAACGTCGGCGGCGCACAGGCCATTGCCGCGCTGGCGTTTGGTACAGAATCCGTGCCGAAAGTGGACAAAATCTTCGGGCCAGGTAACGCCTTTGTCACCGAGGCAAAACGTCAGGTGAGCCAGCGTCTGGACGGTGCGGCGATCGATATGCCCGCAGGCCCGTCGGAAGTGCTGGTGATTGCTGACAGCGGCGCAACGCCGGATTTCGTGGCTTCTGATTTGCTCTCCCAGGCTGAACACGGCCCGGACTCTCAGGTGATTTTACTGACGCCCGCTGCTGATATGGCGCGTCGCGTAGCCGAAGCTGTCGAACGCCAGCTGGCAGAACTGCCGCGAGCTGAAACCGCCCGCCAGGCACTGAACGCCAGCCGCCTGATCGTGACTAAAGATTTAGCGCAGTGCGTG +>24 +ATTGCCGGTTGTCAGAAGGTGGTGCTCTGCTCTCCTCCACCGATCGCCGATGAGATCCTGTACGCGGCGAAGCTGTGCGGCGTGCAGGCGATCTATAAAGTGGGCGGTGCGCAGGCGATTTCTGCCCTGGCGTTCGGAACAGTATCCATTCCTAAGGTCGACAAAATCTTTGGCCCGGGCAATGCCTACGTGACCGAGGCGAAGCGCCAGGTCAGCCAGCGTCTGGACGGCGCGGCGATTGATATGCCTGCCGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCTACACCGGATTTCGTGGCCTCTGACCTGCTCTCGCAGGCCGAGCACGGCCCTGACTCGCAGGTGATTTTACTGACGCCAGATGCCGACATGGCAAAACGCGTGGGCGACGCCGTTGAGCGTCAGCTGGCTGACCTGCCGCGGGCGGAAACGGCGCGTCAGGCGCTATCCGCCAGCCGCCTGATTGTGGCCCGCGATCTTGACCAGTGCATC +>25 +ATCGCCGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTACGCCGCGCAACTCTGTGGCGTGAAAGAAGTGTTTAACGTGGGTGGCGCACAGGCCATTGCCGCGCTGGCGCTGGGCACGGAGTCTATTCCAAAAGTCGATAAAATCTTTGGGCCGGGCAACGCCTATGTGACCGAAGCCAAGCGCCAGGTCAGCCAGCGTCTTGACGGCGCGGCAATCGATATGCCCGCCGGACCGTCCGAAGTATTGGTTATCGCCGACAGCGGCGCAACGCCGGATTTTGTCGCCTCCGACCTGCTTTCTCAGGCCGAGCACGGCCCAGACTCGCAGGTGATCCTGCTGACGCCGGACGCTAAGCTTGCCGAGGGCGTGGCCGAAGCCGTTGAACGCCAGCTCGCCGAGCTGTCCCGCGCCGACACCGCGCGTCAGGCGCTCTCCGCCAGCCGTTTAATCGTAGCGAAAGATCTGGCGCAGTGCGTG +>26 +ATCGCGGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTATGCGGCGCGTTTGTGCGGGGTACAGCAGGTCTATCAGGTGGGCGGCGCTCAGGCCATCGCGGCGCTGGCGTTTGGCACCGAGACCGTACCCAAAGTGGACAAAATCTTCGGGCCGGGCAATGCGTTTGTCACCGAAGCCAAACGTCAGGTCAGCCAGCGGCTGGATGGCGCGGCGATTGATATGCCTGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGATAGCGGCGCGACCACGGATTTCGTGGCCTCGGATTTGCTGTCCCAGGCGGAACACGGCCCGGATTCGCAGGTGATCCTGCTGACACCGGACAGCGCCATGGCGCAGGCGGTGGCCGACGCGGTTGAGCGTCAACTCGCCGAACTGCCGCGCGCGGAAACAGCTCGCCAGGCGCTGGCGGAAAGCCGCCTGATTGTGGCGCGCGATTTAGCGCAGTGCGTG +>27 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAATTTTTGACATTCTGGATATTCCGCACCATGTCGAAGTGGTTTCTGCTCACCGTACCCCCGATAAACTGTTCAGCTTTGCCGAAAATGCTGAAGAAAACGGCTTTCAGGTAATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCAGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTTGGCGTTCCGGTACAAAGCGCTGCGCTAAGCGGTGTGGACAGTCTCTATTCTATTGTACAGATGCCGCGCGGTATTCCGGTTGGCACACTGGCCATCGGCAAAGCTGGCGCCGCTAACGCGGCGCTGCTGGCGGCGCAAATTCTGGCCACCCACGATAACGCACTGCATCAGCGCCTTCGCGAC +>28 +AGCGACTGGACTACCATGCAATTCGCCGCCGAAATTTTTGAAATTCTGGATGTTCCGCACCATGTAGAAGTGGTTTCCGCCCATCGAACCCCTGATAAACTGTTCAGCTTCGCCGAAACGGCGGAAGAGAACGGATATCACGTGATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACATTGGTGCCGGTACTCGGCGTTCCGGTACAAAGCGCAGCATTAAGCGGTGTGGATAGCCTTTACTCCATTGTTCAGATGCCGCGTGGCATTCCGGTGGGTACACTGGCTATCGGCAAAGCCGGGGCTGCGAACGCCGCGCTGCTGGCAGCGCAAATTTTGGCCACACACGATAATGCGCTGCACCAGCGCCTGAGCAAC +>29 +AGCGACTGGGCTACCATGCAGTTCGCCGCAGAAATCCTCGATATTCTGAACGTACCTCACCATGTTGAAGTGGTTTCCGCCCACCGCACGCCCGATAAACTGTTCAGCTTCGCCGAAGACGCCGAAAGCAACGGTTATCAGGTGATTATTGCCGGTGCCGGCGGCGCTGCGCACTTACCCGGAATGATTGCCGCCAAAACGCTGGTCCCGGTATTAGGTGTACCCGTCCAGAGCGCCGCATTAAGCGGTGTCGATAGCCTCTACTCCATCGTGCAGATGCCGCGCGGCATTCCGGTCGGTACGCTGGCGATCGGTAAAGCCGGTGCCGCTAACGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>30 +AGCGACTGGGCTACCATGCAGTTCGCCGTCGAAATCTTCGAAATCCTGAATGTCCCGCACCACGTTGAAGTGGTTTCTGCTCACCGCACCCCCGATAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTATCAGGTGATTATTGCGGGCGCAGGCGGCGCAGCGCACCTGCCAGGCATGATTGCCGCCAAAACGCTGGTGCCGGTGCTGGGCGTGCCAGTACAGAGCGCCGCACTGAGCGGTGTCGATAGCCTCTACTCCATCGTACAAATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATTGGTAAAGCTGGCGCGGCAAACGCGGCATTACTGGCAGCACAAATTCTCGCGACTCACGATAAAGAGCTACACCAGCGTCTGAATGGC +>31 +AGCGACTGGGCTACCATGCAGTTTGCCGCCGAAATCTTCGATATCCTGAACGTTCCACACCACGTTGAAGTGGTTTCCGCACACCGCACCCCCGATAAGCTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAAGGGTTATCAGGTGATTATTGCCGGTGCTGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTGGGCGTGCCGGTGCAAAGCGCTGCGCTGAGCGGCGTGGACAGCCTCTACTCTATCGTCCAGATGCCGCGCGGCATTCCGGTCGGCACGCTGGCGATCGGCAAAGCGGGCGCGGCGAACGCGGCGTTACTGGCAGCGCAAATTCTGGCGACACACGATAAAGACCTGCGCCAACGTCTGGCGGAC +>32 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTCGAAATGCTGGACGTTCCGCACCATGTTGAAGTCGTCTCAGCCCACCGTACCCCTGATAAACTGTTCAGCTTCGCCGAAAGCGCTGAAGAAAACGGTTATCAGGTTATTATTGCGGGTGCTGGCGGTGCAGCGCATCTGCCGGGCATGATTGCAGCGAAAACGCTGGTCCCCGTGTTAGGCGTTCCGGTACAAAGCGCAGCGTTGAGCGGCGTAGATAGCCTCTACTCAATCGTGCAGATGCCACGCGGCATCCCCGTGGGTACGCTGGCGATTGGGAAAGCGGGTGCGGCAAATGCGGCCCTGCTGGCAGCACAAATTCTGGCAACACACGACAAAGCATTACATCAGCGTCTGAGCGAC +>33 +AGTGACTGGGCAACCATGTCTCATGCCGCAGATGTATTAGATACACTACAAATTCCTTACCATGTTGAGATTGTCTCTGCACACCGAACCCCTGATAAGTTATTTAGTTTTGCTGAAAAAGCAAAAAGTAATGGCTTTGATGTCATTATTGCTGGTGCAGGAGGAGCTGCCCATTTACCAGGAATGCTTGCAGCTAAAACGTTAGTACCCGTATTTGGTGTTCCTGTTCAAAGTGCGACATTAAGCGGTGTTGATAGCCTCTATTCAATCGTACAAATGCCAAAAGGTATCCCTGTAGGAACCTTAGCGATTGGTAAAGCAGGGGCTGCCAATGCGGCTTTATTAGCGGCTCAAGTTTTAGCGTTACATTCTCCTGCTATTTTAGATGCATTGACTGCA +>34 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTTGAAATCCTGAATGTTCCGCACCACGTCGAAGTGGTTTCCGCACACCGTACCCCGGACAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTACGAGGTGATCATTGCCGGTGCGGGCGGCGCAGCACATCTGCCGGGCATGATTGCCGCCAAAACGCTGGTGCCGGTACTGGGTGTTCCCGTGCAAAGCGCCGCGTTAAGCGGGGTGGATAGCCTTTACTCTATTGTCCAGATGCCGCGCGGTATTCCTGTCGGTACCCTGGCGATTGGTAAAGCAGGTGCGGCAAATGCCGCCCTGCTGGCCGCGCAGATCCTGGCGACGCATGATAAAGATTTGCACCAGCGTCTGGCGGAG +>35 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAACGGCGGAAGAGAACGGATATCAAGTGATTATTGCCGGCGCGGGCGGCGCGGCGCACCTGCCGGGAATGATTGCGGCAAAAACGCTGGTCCCGGTACTCGGCGTGCCGGTACAAAGCGCTGCGCTAAGCGGCGTGGATAGCCTTTACTCCATTGTGCAGATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATCGGTAAAGCCGGTGCGGCTAATGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>36 +AGCGACTGGGCCACCATGCAGCATGCCGCTGAAATTCTTGATGCCCTTGATGTTCCTTACCATGTTGAAGTGGTTTCCGCTCACCGCACGCCTGATAAGCTTTTCAGCTTTGCTGAATCCGCGCAGCACAACGGTTATCAGGTGATTATTGCTGGCGCAGGCGGTGCGGCGCATCTGCCGGGCATGATCGCCGCGAAAACCCTGGTGCCGGTATTAGGCGTGCCGGTGCAAAGCGCGGCCCTGAGCGGCGTGGACAGCCTCTACTCTATCGTGCAAATGCCGCGCGGCATTCCGGTAGGGACGCTGGCGATCGGCAAAGCGGGTGCTGCAAACGCCGCACTGCTGGCGGCGCAGATCCTCGCCCAGCATGACGATGCGCTACTGGCGCGTCTGGCGGCA +>37 +AAACGCTTCCTGAACGAACTGACCGCCGCTGAAGGGCTGGAACGTTATCTGGGCGCCAAATTCCCGGGTGCGAAACGTTTCTCGCTCGAGGGGGGAGATGCGCTGATACCTATGCTGAAAGAGATGGTTCGCCATGCGGGTAACAGCGGCACTCGCGAAGTGGTGCTGGGGATGGCGCACCGCGGTCGTCTGAACGTGCTGATCAACGTACTGGGTAAAAAACCGCAGGATCTGTTCGACGAGTTTGCCGGTAAACATAAAGAACATCTGGGTACCGGCGACGTGAAGTATCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGTCTGGTTCACCTGGCGCTGGCGTTTAACCCATCGCATCTGGAAATTGTGAGCCCGGTGGTGATGGGCTCCGTGCGCGCCCGTCTGGACCGACTGGACGAACCGAGCAGTAATAAAGTGCTGCCGATCACTATTCACGGCGACGCCGCGGTGACCGGCCAGGGCGTGGTTCAG +>38 +AAACGCTTCCTGAACGAACTGACCGCTGCAGAAGGGCTGGAACGTTATCTGGGGGCAAAATTCCCTGGCGCGAAACGTTTTTCGCTGGAAGGCGGCGATGCGTTAATTCCGATGCTCAAAGAGATGGTCCGCCATGCGGGCAACAGCGGCACCCGCGAAGTGGTGTTGGGAATGGCGCACCGTGGTCGCCTGAACGTACTGGTCAACGTGCTGGGTAAAAAACCTCAGGATCTGTTTGACGAGTTTGCCGGTAAACATAAAGAACATTTGGGCACCGGCGACGTGAAGTACCATATGGGTTTCTCGTCGGATATCGAAACCGAAGGCGGACTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTCAGCCCGGTAGTGATGGGGTCTGTGCGCGCACGTCTCGACCGGCTCGACGAACCGAGCAGCAACAAAGTGTTGCCAATCACCATTCATGGTGATGCAGCAGTTACCGGGCAGGGCGTGGTTCAG +>39 +AAACGCTTCTTAAGCGAACTGACCGCCGCTGAAGGCCTTGAACGTTACCTCGGCGCAAAATTCCCTGGCGCAAAACGCTTCTCGCTGGAAGGCGGTGACGCGTTAATCCCGATGCTTAAAGAGATGATCCGCCACGCTGGCAACAGCGGCACCCGCGAAGTGGTTCTCGGGATGGCGCACCGTGGTCGTCTGAACGTGCTGGTGAACGTGCTGGGTAAAAAACCGCAAGACTTGTTCGACGAGTTCGCCGGTAAACATAAAGAACACCTCGGCACGGGTGACGTGAAATACCACATGGGCTTCTCGTCTGACTTCCAGACCGATGGCGGCCTGGTGCACCTGGCGCTGGCGTTTAACCCGTCTCACCTTGAGATTGTAAGCCCGGTAGTTATCGGTTCTGTTCGTGCCCGTCTGGACAGACTTGATGAGCCGAGCAGCAACAAAGTGCTGCCAATCACCATCCACGGTGACGCCGCAGTGACCGGGCAGGGTGTGGTTCAG +>40 +AAACGCTTCCTCAGCGAACTGACTGCAGCGGAAGGTCTGGAACGCTACCTGGGCGCGAAATTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGTGATGCGTTAATCCCAATGCTCAAAGAGATGATCCGCCACGCCGGTAACAGCGGTACCCGTGAAGTGGTACTGGGTATGGCGCACCGTGGTCGTCTGAACGTCCTGGTTAACGTGCTGGGTAAAAAGCCGCAGGATCTATTCGACGAATTTGCGGGCAAACATAAAGAACACCTCGGTACCGGTGACGTGAAGTACCACATGGGCTTCTCATCGGATATCGAAACCGAAGGCGGTCTGGTGCATCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTTATCGGTTCCGTACGTGCACGCTTGGATCGTCTGGACGAGCCGAGCAGCAATAAAGTGCTGCCAATCACTATTCATGGTGATGCGGCAGTAACCGGGCAAGGCGTGGTTCAG +>41 +CGTACTTTCCTTGAAGAGCTGACTGCCGCTGAAGGTTTAGAGCGCTATCTTGGTGCGAAATTCCCTGGTGCTAAACGTTTCTCTCTCGAAGGGGGGGATGCCTTAGTTCCGATGACCAAAGAGATGATCCGTCACGCGGGTGCCAGTGGCATGCGTGAAGTGGTGATTGGGATGGCGCACCGCGGTCGCTTGAACATGCTGGTCAACGTTCTGGGTAAAAAACCGCAAGATCTGTTTGATGAGTTTGCCGGTAAACATGGCGAAGGCTGGGGCACAGGTGATGTGAAATATCACCAAGGTTTCTCCGCTGACTTTGCGACACCGGGCGGTGATGTTCACTTAGCACTGGCTTTCAACCCATCGCATCTTGAGATTGTGAACCCTGTTGTGATGGGTTCAGTTCGCGCGCGTCAAGACCGCCTAGGTGATGAAGATGGCAGTAAAGTGCTACCTATCACTATCCATGGTGACTCTGCGATTGCCGGACAAGGTGTGGTGGCT +>42 +AAACGCTTCCTGAGCGAGCTGACCGCAGCCGAAGGCCTTGAGCGCTACCTGGGCGCGAAGTTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGCGACGCGCTGATCCCGATGCTGAAAGAGATGATTCGCCACGCGGGCAACAGCGGCACGCGTGAAGTGGTGCTGGGTATGGCGCACCGCGGTCGTCTTAACGTGCTGGTTAACGTGCTGGGTAAAAAACCGCAGGACCTGTTCGACGAGTTCGCGGGCAAACACAAAGAACACCTTGGCACCGGCGACGTGAAGTACCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGCCTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTAATTGGTTCGGTACGTGCCCGTCTGGATCGGCTGGACGAGCCGAGCAGCAACAAAGTACTGCCGATCACCATTCACGGCGACGCCGCGGTGACCGGTCAGGGCGTGGTTCAG +>43 +GTGCTGGGCCGTAATGGTTCCGACTATTCCGCCGCCGTGCTGGCCGCCTGTTTACGCGCTGACTGCTGTGAAATCTGGACTGACGTCGATGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGACTGCTGAAATCGATGTCCTACCAGGAAGCGATGGAACTCTCTTACTTCGGCGCCAAAGTCCTTCACCCTCGCACCATAACGCCTATCGCCCAGTTCCAGATCCCCTGTCTGATTAAAAATACCGGTAATCCGCAGGCGCCAGGAACGCTGATCGGCGCGTCCAGCGACGATGATAATCTGCCGGTTAAAGGGATCTCTAACCTTAACAACATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGGATGATTGGGATGGCGGCGCGTGTTTTCGCCGCCATGTCTCGCGCCGGGATCTCGGTGGTGCTCATTACCCAGTCCTCCTCTGAGTACAGCATCAGCTTCTGTGTGCCGCAGAGTGACTGC +>44 +GTGCTGGGGCGTAACGGTTCCGACTATTCCGCTGCGGTACTGGCCGCCTGTTTACGCGCCGACTGTTGCGAAATCTGGACGGACGTTGACGGTGTGTATACCTGCGACCCGCGCCAGGTGCCGGATGCCAGACTGCTGAAGTCAATGTCCTATCAGGAAGCGATGGAACTTTCCTACTTCGGCGCCAAAGTGCTTCACCCGCGTACCATTACTCCCATCGCTCAATTCCAGATCCCATGTCTGATAAAAAATACCGGTAATCCGCAAGCGCCGGGCACGCTGATTGGCGCCAACAGCGATGAAGACGGGCTACCGGTAAAAGGCATCTCGAACCTCAATAATATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGCATGGTCGGGATGGCGGCGCGCGTGTTCGCCACCATGTCGCGTGCCGGGATTTCGGTAGTGCTGATCACCCAATCCTCTTCGGAGTACAGCATCAGCTTCTGCGTGCCGCCAAAGCGATGC +>45 +GTGCTGGGCCGTAACGGCTCCGATTATTCCGCCGCCGTACTGGCCGCCTGTTTACGCGCTGACTGTTGTGAAATCTGGACTGACGTCGACGGCGTGTATACCTGCGACCCGCGTCAGGTGCCAGACGCCAGGCTGCTGAAGTCGATGTCTTATCAGGAAGCAATGGAGCTTTCTTACTTCGGCGCTAAAGTACTACATCCGCGCACTATTACTCCTATTGCCCAGTTCCAGATCCCTTGTCTGATTAAAAATACCGGCAATCCACAAGCGCCCGGTACGCTGATCGGCGCTGCCAGCGACGATGATGCTCTGCCGGTTAAAGGGATTTCTCACCTTAACAACATGGCGATGTTTAGTGTCTCCGGTCCGGGGATGAAAGGCATGGTGGGTATGGCGGCGCGCGTTTTTGCCGCTATGTCACGTGCGGGAATCTCGGTGGTGTTGATCACGCAATCTTCATCTGAATACAGCATCAGCTTCTGCGTGCCGCAGAGCGACTGC +>46 +GTGCTGGGCCGCAACGGTTCTGATTACTCCGCTGCGGTGTTGGCTGCCTGCTTACGCGCCGACTGTTGTGAGATCTGGACTGACGTTGACGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGGTTGCTGAAGTCGATGTCCTATCAGGAGGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTCCTTCATCCTCGCACCATCACCCCCATTGCCCAGTTCCAAATCCCATGCCTGATTAAAAACACCGGAAACCCGCAGGCCCCTGGTACGCTGATCGGCGCCAGCGTGGATGAAGACGAACTGCCGGTGAAAGGGATCTCGAACCTGAACAATATGGCGATGTTCAGCGTTTCCGGCCCAGGAATGAAAGGGATGATCGGGATGGCGGCGCGCGTCTTCGCGGCAATGTCCCGCGCGGGGATCTCCGTGGTGCTGATCACGCAATCCTCTTCTGAATACAGCATCAGTTTCTGCGTACCGCAGGGCGACTGC +>47 +GTGTTGGGGCGCAATGGCTCTGACTACTCTGCCGCTGTGCTGGCTGCCTGTTTACGCGCGGACTGTTGTGAGATCTGGACCGATGTCGACGGCGTATATACCTGCGATCCGCGCCAGGTACCCGATGCCCGACTGCTGAAGTCGATGTCTTATCAGGAAGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTTCTGCATCCGCGCACCATTACCCCAATTGCCCAGTTCCAGATCCCGTGCCTGATTAAAAATACCGGCAATCCACAAGCGCCTGGCACGTTGATCGGCGCCAGCAGTGATGAAGACGATTTGCCGGTAAAAGGTATTTCTAACCTCAATAACATGGCGATGTTTAGCGTCTCCGGCCCTGGAATGAAAGGCATGGTAGGCATGGCGGCGCGCGTTTTTGCCGCGATGTCGCGTGCGGGCATCTCGGTGGTGCTGATCACGCAGTCTTCTTCTGAATACAGCATCAGCTTCTGCGTTCCGCAGGGCGACTGC +>48 +GTATTAGGTCGCAATGGTTCAGACTACTCAGCTGCAGTATTAGCAGCCTGTTTACGTGCTAAATGCTGTGAAATTTGGACTGATGTTGACGGTGTTTATACTTGTGATCCACGTTTAGTGCCTGATGCACGTTTGTTAAAAGGCATGTCATATCAAGAGGCAATGGAACTGTCTTACTTTGGTGCCAAGGTACTTCATCCTCGTACAATTGCGCCTATTGCCCAATTCCAAATACCTTGTTTAATTAAAAATACGGGCAATCCAGATGCGCCGGGTACCTTGATTGGTGATGGTCAAAAAGATGAGAGCACACCTGTTAAAGGAATAACTAACCTTAATAATATGGCAATGATCAACGTATCTGGGCCTGGAATGAAAGGAATGGTAGGAATGGCGGCTCGCGTGTTCTCGGTAATGTCGAGAGCGGGGATTTCAGTTGTTCTAATCACACAGTCTTCTTCTGAATACAGCATTAGTTTTTGTGTGCCACAAAAAGAGCTG +>49 +GTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGCTGCCTGTTTACGCGCCGATTGTTGCGAGATTTGGACAGACGTTGACGGGGTCTATACCTGCGACCCGCGTCAGGTGCCCGATGCGAGGTTGTTGAAGTCGATGTCCTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCCCGCACCATTACCCCCATCGCCCAGTTCCAGATCCCTTGCCTGATTAAAAATACCGGAAATCCTCAAGCACCAGGTACGCTCATTGGTGCCAGCCGTGATGAAGACGAATTACCGGTCAAGGGCATTTCCAATCTGAATAACATGGCAATGTTCAGCGTTTCCGGCCCGGGGATGAAAGGAATGGTTGGCATGGCGGCGCGCGTCTTTGCAGCGATGTCACGCGCCCGTATTTCCGTGGTGCTGATTACGCAATCATCTTCCGAATACAGTATCAGTTTCTGCGTTCCACAAAGCGACTGT +>50 +GTGCTCGGGCGCAACGGCTCCGATTATTCCGCAGCGGTACTGGCAGCGTGTTTACGCGCCGATTGTTGCGAGATCTGGACTGATGTCGATGGTGTCTATACCTGCGACCCACGTCAGGTACCGGATGCCCGATTACTTAAGTCGATGTCGTACCAGGAGGCTATGGAACTCTCCTATTTCGGCGCCAAAGTCCTCCATCCTCGAACCATCACTCCCATCGCCCAGTTCCAGATTCCCTGCCTGATAAAAAATACCGGAAACCCGCAAGCACCAGGAACGCTGATTGGCGCCAGCCGCGACGAAGATGATCTGCCGGTGAAGGGCATTTCAAATCTCAATAATATGGCGATGTTCAGCGTCTCCGGGCCGGGGATGAAGGGAATGGTCGGCATGGCTGCTCGCGTGTTTGCGGCAATGTCTCGCTCAGGAATTTCGGTAGTCCTGATTACGCAATCCTCCTCTGAGTACAGCATTAGCTTCTGTGTACCGCAGGCTGACTGT +>51 +GTGCTGGGGCGTAACGGCTCTGACTACTCCGCCGCCGTGCTGGCGGCCTGCTTACGCGCGGACTGCTGTGAGATCTGGACTGACGTCGACGGCGTTTATACCTGCGATCCGCGCCAGGTACCGGACGCCAGGCTGCTGAAGTCGATGTCGTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCGCGTACCATCTCCCCGATTGCCCAGTTCCAAATCCCTTGCCTGATTAAGAATACCGGTAACCCTCAGGCGCCGGGCACGCTGATTGGCGCCAGCGCGGATGAAGATGAACTGCCGGTGAAAGGCATTTCTAACCTCAATAACATGGCGATGTTCAGCGTCTCCGGCCCGGGGATGAAGGGCATGGTCGGCATGGCGGCACGCGTATTTGCCGCTATGTCCCGCAACGGGATCTCCGTGGTGCTGATCACGCAGTCTTCTTCCGAATACAGCATCAGCTTCTGCGTTCCGCAGGGTGATTGC +>52 +GTATTAGGCCGTAACGGTTCCGACTACTCCGCCGCCGTGCTGGCCGCGTGTTTGCGCGCCGACTGTTGTGAGATCTGGACTGACGTCGACGGCGTCTATACCTGCGACCCGCGCCAGGTGCCGGACGCCAGGCTGCTGAAGTCGATGTCGTATCAGGAAGCCATGGAACTCTCCTACTTCGGCGCTAAAGTTCTCCACCCCCGCACCATTGCCCCCATCGCCCAGTTCCAAATCCCCTGTCTGATCAAAAACACTGGTAACCCGCAAGCGCCAGGCACCCTGATCGGTGCCAGCAGCGATGAAGACGGCCTGCCGGTGAAGGGCATCAGTAACCTGAATAATATGGCGATGTTCAGCGTCTCTGGTCCGGGCATGAAAGGCATGGTGGGAATGGCGGCGCGCGTGTTCGCGGCGATGTCCCGTGCGGGCATCTCGGTGGTGCTGATCACCCAATCGTCTTCTGAATACAGCATCAGCTTCTGCGTGCCGCAGGCCGACAGC diff --git a/locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.ndb b/locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.ndb new file mode 100644 index 0000000000000000000000000000000000000000..dfa7d2e267e27fdcac41d817c56044823912df6b GIT binary patch literal 20480 zcmeI%u}*_f6adgGnn+wQ#(ywwF7Ez{QB0atSk$?{)y4En{D3a@50nD0P>05(Iy5;9 zy!&{M%k5#hB_bVp8+{VEZl~uF`CS|BJbm@Y&P}7mw14ZFqjezx0t5&U zAV7cs0RjXF5FoI>z|eR9NB=)$AEK`2tS>!{C;j~AzWslp33bORK$56a26G33lxW~VPj%q;tg;sy%comRr(uEevFOhThS)()6H3s z6>seN|G!V$0yw3B1Wqd;f-?%p;H&~dIH!OV&MP2> z3kt~Lq5^`rq<|zYBhZn=6$K=5RRKv{Q$P~e6_CUY1tf7(0ZD8qAckiNjy+M5)T!S#HIq0c%*}tG zZ^sMi6f4nz(xlPXYgh@Z&oHb@XD#FU)&IfI*Y>wVT&>YxPr&nmV7~6!xPjbe;7A0gSS#c1h+7YcT!^%w=s_o zQs4wev51dSQy0duf=|-XQ@Dd?*pZH%#)SS9@{s-)!w2|D{!^f4PZ_)9Tf|rLTNJwk#BTooH4p9;?y@ZRlXt;Imt1zmfvc{$?uMIgx$TZacinU3 wz6Tz9Z*opT}eelsIpMCMwH{bp6(=WgMapr&Q3tcn?%>V!Z literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.nsq b/locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.nsq new file mode 100644 index 0000000000000000000000000000000000000000..7fc5e3b8e6fafd8419a1bdda10966df4ce2396be GIT binary patch literal 6297 zcmZ9Mk3W-p|Hn6KrJ`R5o!jXgnzNjv&Ns@A(o}>_DwShXLu_kiQ(xyW9GXe`!NxHA zk)0e|rLs<^qAApUQm1oQ;>2k>gtg6b7dlbPee``_*Vg%-$Ndjn&pn^7_h&fU?yf3w z)y>@g_lZg&z0+@j-e8eXQ193s( zCt37>1RIlK%0=ZTQ{iTk#0hMf?Y1WEO+e(QDM92EsP-xy!p*!m9%CkAXH}HyS zDnQM^k(N%2$!BfaKI=wncT_Do{_gI*p>sGHGN5x(vKc%)LRK0R+F#`;{hW>R`#{;VsepQ=fwXjqR%F{|vn;5!`(c&qz}?-0p>s?bvI9KDzLYIY zjBWrrxM%LeO(^@}j`)3MXmc}(7TY2#76IH1`;o{kwn#>E-ZN}=i{q!fggrAM#JEh<0j{B0I+gw}{cx7nn>c1CW_g;XD z7TIpXEyII*Eji)l?l{;zL-q$#k;KYoFkyEkBB>J><5s3c3#Lj3*D5lgyn11GCX}z2 zG<;cg_rwF8hVG7_ba@}p#h5$LiF!sIxC4EN zQHaskxbkKxPXZl26=bl`gWYL2zVN7*D{meAS{kvGRCa(zV+vlsxu;jqY0H>L%qg3P zNJZ5uO|vOknyr0)DLVl^kLcd6dLpT>qqVSrZth^ZcYgrBIRF1%O?UIE04OnSn1nI zhcJ2wxc`klU3ePkJd|F6fIGdXGe&=BD+hH~Tg$TxE#+<1B}zl)D{08|WwQ8`NG1i{ zy{V5rizzVk!iKz$!tSX1J1CzKhQRJ=7WXl5KkQHJ!*;(9^yU*|*zVJXSC(Xy03CL( z0s6ML51#1lfd04L*@yhF-EE8D>iAzvLtN`*+t1TNuD^Z*?kd_WmO$$RyXThz9d+;O z4KY&<1zovVdAPd2=pmbXBT%}f{3D=$oMCZqL|U68qTeQT=Vmwu-KPv%iw!@9}5YY!4NxOSIe$xI?yRovV3!jW_SQsllP#(6je>&y{Z-=uku zb{ezgpnk(f)fP{WWQX$jw%hI6tyLw#C8iT!#@JOY5vMyzicZ`A4MyRy3dfo@APs~2 z-&Q*fAWY5RrGqFeL2keGJ*-QncHyL4i{uDKBsf_LYSwCw?eJtY#OgzK{v+(jaGZOE zk|A%`it9^4I(zfGD*^Wivt`r9MDiua z66B`&TI9yzoXKFKOk}oEutE;G^j(FH(=0UiAkXhJ-77THU2ki*)LA>;sFt;}{Gj9C z)o^=R?PF(x5Trlr7m8EI5~jTWQmnP(Ru_|ECPG$~^eT8R2_u&x*#cuRPO!;T=+c*N z{;+r2)2mw<5s589o@e&EmvxMnwrksKFSJ!^=^LTr3Ng!X-g}sH-p$$QXq(#2iW`Wu z|4mP)>wqs$6Mqfsn5Yp#N5xDpvP#mcNS+sy@&j}P825=pC#1*Au8&Am1$pYP zsVYKtQQJ|o5V`5wFR4dQJ{n1Jy7nFh7cxDNX2WD{H7kPD(zuLgaju3qru?HTDKCzn z=wxXGoBYAd6U@pL?Q>Y_OXz6xaDhqWQ1A0umD24ySR&g!_u0i+O}ud1gM=QooFtZ} z*G@?GcDD(dR?M9$XA@%o?QFN6>m3%>7>}^;73!DPar-sHMbZWA;^68D?dyEwH{tw} z+I{2UIy#?Gty-v0^hS17O$2~%UfF+F_W z{Mvo5!+)jocU4O-8bTK*9*c=%EGHgsQ%dj%m$Zf$8N+{k*yh8fC_Ow2qjzmjWr%G1 z?q^8S70KZNt%SFAC3fvnR?U>8zgsWB=p2XG7tVGRuJ`e<#wvumrBLr#kGhM&y^K*k zVNNf&ofaNFzczY&J?1XGSiAJBQMh>t!|SPB@2bKx*`#Zjdob%O!v-ZjFbSn+RXS4> z^dgVgTWoND5x%$;+)Kcng-l&og3;|KYP|`u;Ev;Z9|gLaT~w$~tmU2rx(dnz-Sk?F zmIwFraNU3S43=so!c{I6X2mhQYI5~56`sK$xf3F({Ko}Y`P@L-WT>|~z4Ev12@H`3 z{T5q@mQP5vdB%a=`@TXZazA}I{?CSa_EY6`4!e2IBq3J53So<({9o=TV0Tk`z7gC@ zYWZmx-9x&t2Cu9;95cxEI+d?iDA4jJx`94f%UnIMxin}M&EhUa-EH^Wq86t&@oZ+d z5}uhX?wKXu|2~duSYba^TSwRp<=12M0fcQ|sAp7h<1P;0L$8ieJ*E97-}oA(hXNh0 zuIhxB{+O~>7}L-2VjO)=mCk2!NtNA^F{6)(Hmi}%%G_1f@>cqNwsKN&F3dYM{29BF zv_%2sOZGaqQJUsK`8o%;tnL7Q+t>* z$eqDJZyxW(AgL5K;EpiSo6jY>pK^SIVbi$jg6zfUe8GdXKHrun8VHFhTHex}@r z$Zh+|b|w%um4;Pl3yAIoR)%e#2pB*6v{HA(?a~2bBATF>nOp74d(V(!j6e3&AJksAqmfo%@vF(+8onYfNKddqD6vk+k zM%z!Uq!Y&HqYUbNVA!D^zz|wIROlX-R%c+}X{DdL#Q z+t8{{wlTqGkvymodG7%XTh(FM@{qpk?~k5Dv+MinE46{!y% z&m&X3{jxq;bO%~3Q*ASR*G{aQ{U7Dc*Ywo?h_)7#TK9;ypMpKcOG4mwsq>R}RbiWB zd$_25%^Zayu>|!%H-}d0aUMm^995{{u700{R-Cn294B-SZv{nPZmgAi&}E4_n{_2& zjmU9~0q?_drasm)U1Uvv8O~I1c?vx2Of4q_S#+*ok2IbSHmQ`OJGHA%1`b9w>FT-- z4DL{Lri-)Z57#yNq7PoN4^!37Wpz!jaxHXKC`PZtjb}3PtrMBE*rJrV(MHOVVQNHD z5V3?R9Bw&jD&aNI3AgJBMxb{VjSN`mTb~63y=~>$*WBt=_&B7%LXXleI=QMd>POv~ zK0_k+5iJeq#gwahd&gJ9qOj%8H5U4}MAV&>Ll{ew13eM{U0UWW&fygMXyf%GI%-I! zh29J9Jk(u{(TfYfU5mO0_k^ey3))ckf$@(LkP0>C9@V>OFz}yIKmNh#o6vKILUa73 zK-U2MA?GmF$GPS#=DvqGqd>CHJCkrAQ!W@!Vp`~;6#H%94s_ZL;)NLDu=_a+{kQWd zeW|3VJwDG|htb>A;12XHn0upo3tAp?e;L*E2dDeiQ`XSqOc$U-dHWB*o&BkE4GHMU z2V;d^1ya8EOJZt{Mlhaf#J8rW%{n8BT^>CE^q7z&y-%r9I4s6?&zWYdC%lI8#YH1J zwEP!wr_O9t2MF#tdcUR)OvO7RS4KA7asTcC>^>OS|5c0bZ07`%JCpjqSNcVg>zcNE zA3Pi`$Ph~pc{iDE!0tq^d`UDZhY0S%_wMY&tfFYRI-NEI-5uyEaA!6=As8|D>5_c3 zd>ww?lHj-4?tfif#ou-Z_n(G)ZbA7T%$;&UU*u@Hx=ZNpnrAT%&GGG2hQ`Dgxy33_ z^{=iPX!wG6mp(u(Q`1~SGx}Er?2Z&BoJ{{V8`<_*wv*QJM#ZX~&7oac{83xyAG*UC zpSbGS%E)-piay%tveo-zr!7PRUUIw3Py5dj1y&>StA9#&G%{&LJ;Rl2c3%FCf5Ede z`-NWO;~u1WrX-+r!v&PiC}@dWT~*cM_Tnx*9;K6kekovgC(sY>2D(qSlhSdlYt`3l zL%Ra_*HQXJ-RbvaA4)gq z%j`rzk0DehfICKK6|`XPZr~nIy`+wG4bSLr2>7;BxZ(SQ;J*LU1Ag)k#(oP#-6v7^ z;)m-Q9B@~GI~jFf_qE0SdH3Zr{<9*QE&~0XH1!fM$-ZZ}#W!*Ful&z3x`c?jQwhzw z+f>6ZU*}9&SMD}9m@M@9Xoabozg|-kK&yLQQ^CWfnKJp7P`38 z`j@SaqfZ0(&Xn%*PoVrN)ID3V;W9I+sC!uDo9OohrT=MYlOXOvSD(opDhF<7urwz9 z$So#DPiSyUzDrM_mYs_tM_b*k^yL|Ta>ucqn7ih^yJ<1dB|4$Y6z#hAy8V0RSa%-+ z_a93!_k;#s(v}{S-iXn&4dW>N;xAf<=8tZNJ-v8m>Budkt*U=rm9E8Y4rs@aH5Yl(nmj8 zu|Ias${PO%$$hmzcla?6sZ=)i{M7JqlDKp7ci)W|{pGtC`gt~9xth1GaCNsc446AZ z+_HCF)kiIE^_JbE$j2tp@^JItqgS_odA1YPacmcM^Jdgt@Z0)eZ`WUk%cS7GhW4Ot z-L}3t3v00Qu)9MG>V8m?aETe!4&~P*`aX&N{8K1DK9A^rL(?P2u8w%YM1-3M`sylO z!xztizWvfU8aaAwv>_ll@>uH0^xc2Li~Gj!kz+>}N*~WaZ(b0!J{Vq{sZ1I#^zWnf z*R7-UqBq~k?XErJzs#Yb^i-v?Rk5Lw6+XA;U-UJJe&~yPnf;pcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.nto b/locidex/example/manifest_in/passes/pass_single/pass_one_db/blast/nucleotide/nucleotide.nto new file mode 100644 index 0000000000000000000000000000000000000000..ad19396e81aff427697a109c3c035ac73cb27f3f GIT binary patch literal 216 zcmXBFg${xM06;;oyHK$M6YTo`U-LHJc6+}dFSG(hN|dQkrAD0wO0 +KFRPGHADYTYHQKYGVRDYRGGGRSSARETAMRVAAGAIAKKYLQQEFGIEVRAYLSQMGDVAIDKVDWNEIENNDFFCPDVDKVAAFDELIRELKKEGDSIGAKIQVVATGVPVGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVRQKGSQHRDPLTPQG +>1 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKHGIVIQGCLTQMGDIPLEIKDWQQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVANGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>2 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGVEIRGCLTQMGDIPLEIKDWSQVELNPFFCPDPDKIEVLDELMRGLKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>3 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLKEKLGIEVRGYLSQLGPITCDLVDWSIVESNPFFCPDPSRLDALDEYMRALKKEGNSIGAKVTVVAQGVPAGFGEPVFDRLDADLAHALMSINAVKGIEIGDGFGVVTLKGTENRDEITKKG +>4 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGIEIRGCLTQMGDIPLEIKDWSQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKDG +>5 +VFRPGHADYTYEQKYGFRDYRGGGRSSARETAMRVAAGAIAKKYLQQKFGIVIRGCLSQMGDIPLAIKDWDQVELNPFFCADADKLDALDELMRGLKKEGDSIGAKVTVVADGVPAGWGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVKLRGSQNRDEITKAG +>6 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAQKFGVVIRGCLTQMGDIPLEIKDWDQVEQNPFFCPDPDKIEALDELMRALKKEGDSIGAKVTVVADSVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFGVVQLRGSQNRDEITTAG +>7 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>8 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>9 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>10 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQPLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>11 +MEMVARVALVQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGERMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGQSLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>12 +MEMIARVTLTQPHDAGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGDSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>13 +MEMIARVALSLPHQAGATTVPARKFFDICRGLPEGAEIAVTLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCAMPVGQPLPNHSVIVPRKGVLELMRMLDGGDSPLRIQ +>14 +SALTENDLVFALSQHAVTFADAELQQQGKSWPSLPRYFAIGRTTALALHTVSGFNIHYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELIGETLTARGADVDFCECYQRSAKYYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>15 +AALGESDLLFALSQHAVAFAQSQLHQQDRKWPRLPTYFAIGRTTALALHTVSGQKILYPQDREISEVLLQLPELQNIAGKRALILRGNGGRELIGDTLTARGAEVTFCECYQRCAIHYDGAEEAMRWQSREVTTVVVTSGEMLQ +>16 +ATLTENDLVFALSQHAVAFAHAQLQRDGRNWPASPRYFAIGRTTALALHTVSGFDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRGRELLGETLTARGAEVSFCECYQRSAKHYDGAEEAMRWHTRGVTTLVVTSGEMLQ +>17 +AALTDNDLVFALSQHAVAFAHAQLQQQELDWPVQPRYFAIGRTTALALHTVNGCDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELLGKTLTERGAEVTFCECYQRSAKHYDGAEEAMRWHSRGVTTIVVTSGEMLQ +>18 +ETLGDNDLLFALSQHAVSFAHAQLQQQGLNWPSLPHYFAIGRTTALALHTVSGHKIRYPQDREISEVLLQLPELQSIAGKRALILRGNGGRELIGQTLTSRGADVTFCECYQRSAKHYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>19 +RLLQEGDLLFALSQHAVEFAHAQLQQHAVSWPHAPRYFAIGRTTALALHTASGIDVRYPLDREISEVLLQLPELQTIAGKRALILRGNGGRELLGETLRERGADVTFVECYQRCAKHYDGAEEAMRWHARGINTLVVTSGEMLQ +>20 +IAGCQKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADIARKVAEAVERQLAELPRAGTARQALSASRLIVTKDLAQCV +>21 +IAGCKKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGIAQNVAEAVERQLAELPRAETARQALSASRLIVTKDLAQCV +>22 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGMASRVAEAVERQLAALPRAETARQALSASRLIVTRSLAQCV +>23 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGTESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPAADMARRVAEAVERQLAELPRAETARQALNASRLIVTKDLAQCV +>24 +IAGCQKVVLCSPPPIADEILYAAKLCGVQAIYKVGGAQAISALAFGTVSIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADMAKRVGDAVERQLADLPRAETARQALSASRLIVARDLDQCI +>25 +IAGCKKVVLCSPPPIADEILYAAQLCGVKEVFNVGGAQAIAALALGTESIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAKLAEGVAEAVERQLAELSRADTARQALSASRLIVAKDLAQCV +>26 +IAGCKKVVLCSPPPIADEILYAARLCGVQQVYQVGGAQAIAALAFGTETVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATTDFVASDLLSQAEHGPDSQVILLTPDSAMAQAVADAVERQLAELPRAETARQALAESRLIVARDLAQCV +>27 +SDWATMQFAAEIFDILDIPHHVEVVSAHRTPDKLFSFAENAEENGFQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLRD +>28 +SDWTTMQFAAEIFEILDVPHHVEVVSAHRTPDKLFSFAETAEENGYHVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLSN +>29 +SDWATMQFAAEILDILNVPHHVEVVSAHRTPDKLFSFAEDAESNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>30 +SDWATMQFAVEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKELHQRLNG +>31 +SDWATMQFAAEIFDILNVPHHVEVVSAHRTPDKLFSFAESAEEKGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLRQRLAD +>32 +SDWATMQFAAEIFEMLDVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKALHQRLSD +>33 +SDWATMSHAADVLDTLQIPYHVEIVSAHRTPDKLFSFAEKAKSNGFDVIIAGAGGAAHLPGMLAAKTLVPVFGVPVQSATLSGVDSLYSIVQMPKGIPVGTLAIGKAGAANAALLAAQVLALHSPAILDALTA +>34 +SDWATMQFAAEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYEVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLHQRLAE +>35 +SDWATMQFAAETAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>36 +SDWATMQHAAEILDALDVPYHVEVVSAHRTPDKLFSFAESAQHNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDDALLARLAA +>37 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLINVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>38 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>39 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDFQTDGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>40 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>41 +RTFLEELTAAEGLERYLGAKFPGAKRFSLEGGDALVPMTKEMIRHAGASGMREVVIGMAHRGRLNMLVNVLGKKPQDLFDEFAGKHGEGWGTGDVKYHQGFSADFATPGGDVHLALAFNPSHLEIVNPVVMGSVRARQDRLGDEDGSKVLPITIHGDSAIAGQGVVA +>42 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>43 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDDDNLPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>44 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGANSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFATMSRAGISVVLITQSSSEYSISFCVPPKRC +>45 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGAASDDDALPVKGISHLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>46 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASVDEDELPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>47 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>48 +VLGRNGSDYSAAVLAACLRAKCCEIWTDVDGVYTCDPRLVPDARLLKGMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPDAPGTLIGDGQKDESTPVKGITNLNNMAMINVSGPGMKGMVGMAARVFSVMSRAGISVVLITQSSSEYSISFCVPQKEL +>49 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDC +>50 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRSGISVVLITQSSSEYSISFCVPQADC +>51 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTISPIAQFQIPCLIKNTGNPQAPGTLIGASADEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRNGISVVLITQSSSEYSISFCVPQGDC +>52 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQADS diff --git a/locidex/example/manifest_in/passes/pass_one_db/config.json b/locidex/example/manifest_in/passes/pass_single/pass_one_db/config.json similarity index 100% rename from locidex/example/manifest_in/passes/pass_one_db/config.json rename to locidex/example/manifest_in/passes/pass_single/pass_one_db/config.json diff --git a/locidex/example/manifest_in/passes/pass_single/pass_one_db/meta.json b/locidex/example/manifest_in/passes/pass_single/pass_one_db/meta.json new file mode 100644 index 0000000..f3b88fa --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_single/pass_one_db/meta.json @@ -0,0 +1,1181 @@ +{ + "info": { + "num_seqs": 53, + "is_cds": "True", + "trans_table": 11, + "dna_min_len": 220, + "dna_max_len": 350, + "dna_min_ident": 80, + "aa_min_len": 73, + "aa_max_len": 116, + "aa_min_ident": 80 + }, + "meta": { + "0": { + "seq_id": 0, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 609, + "dna_seq_len": 501, + "dna_seq_hash": "4811bc98591c74954ace3cb487330482", + "aa_seq_len": 167, + "aa_seq_hash": "a8fbcf8179d8548f980b7b15f29de1d4", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "1": { + "seq_id": 1, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 614, + "dna_seq_len": 501, + "dna_seq_hash": "b66979eaf680fab872ffe1bde4c092d6", + "aa_seq_len": 167, + "aa_seq_hash": "3e034a4d80ac27352822774abd9319df", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "2": { + "seq_id": 2, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 618, + "dna_seq_len": 501, + "dna_seq_hash": "f02a36ff6df05f9bf38428fa22a035da", + "aa_seq_len": 167, + "aa_seq_hash": "e2d30bb18231528ef65c34880704dd7a", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "3": { + "seq_id": 3, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 619, + "dna_seq_len": 501, + "dna_seq_hash": "bee9d7360aa8e9b840fb29afa1de2c2e", + "aa_seq_len": 167, + "aa_seq_hash": "c3f71f5780b5f1031aaf21697a482ee3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "4": { + "seq_id": 4, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 620, + "dna_seq_len": 501, + "dna_seq_hash": "5b7956485455fdbc7c86d4834a8f7406", + "aa_seq_len": 167, + "aa_seq_hash": "60ce8f3b07f53378580ee528910ee623", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "5": { + "seq_id": 5, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 624, + "dna_seq_len": 501, + "dna_seq_hash": "98ba14aac74444a253123aff3d20c69f", + "aa_seq_len": 167, + "aa_seq_hash": "bab41702c7c209def93f9c9930c27086", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "6": { + "seq_id": 6, + "locus_name": "aroC", + "locus_name_alt": "SALM25359", + "locus_product": "chorismate synthase", + "locus_description": NaN, + "locus_uid": 716, + "dna_seq_len": 501, + "dna_seq_hash": "6b9166d5d996897cae3cc288d7969d78", + "aa_seq_len": 167, + "aa_seq_hash": "5bc86c0a9226224922cbd6219c182622", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "7": { + "seq_id": 7, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "d401763f2df6e5fe87e1e07d3c170fe6", + "aa_seq_len": 167, + "aa_seq_hash": "928ad814483bbffda3e3b3a0aa4ca072", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "8": { + "seq_id": 8, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 120, + "dna_seq_len": 501, + "dna_seq_hash": "9c50d73cc4ef8d0a447f07ad150ad8cc", + "aa_seq_len": 167, + "aa_seq_hash": "928ad814483bbffda3e3b3a0aa4ca072", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "9": { + "seq_id": 9, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 555, + "dna_seq_len": 501, + "dna_seq_hash": "fab4f658dfba0cd0174a4a87998cf948", + "aa_seq_len": 167, + "aa_seq_hash": "a081905e659429db1f40e145932ae277", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "10": { + "seq_id": 10, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 557, + "dna_seq_len": 501, + "dna_seq_hash": "acb2ed027124e2a54b7734cd538590f1", + "aa_seq_len": 167, + "aa_seq_hash": "970184ec5ccc9f02ee3c858d2687cc18", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "11": { + "seq_id": 11, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 558, + "dna_seq_len": 501, + "dna_seq_hash": "ad996a122298d55ab3d4b2ea7a4974b0", + "aa_seq_len": 167, + "aa_seq_hash": "945455021fffea9b793d16af630db961", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "12": { + "seq_id": 12, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 563, + "dna_seq_len": 501, + "dna_seq_hash": "815242e67f31f4e2968f7f0620565125", + "aa_seq_len": 167, + "aa_seq_hash": "1b117ca76a022ae63d6f7bfe2ead289e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "13": { + "seq_id": 13, + "locus_name": "dnaN", + "locus_name_alt": "SALM25360", + "locus_product": "DNA polymerase III subunit beta", + "locus_description": NaN, + "locus_uid": 633, + "dna_seq_len": 501, + "dna_seq_hash": "532742ae95c046241789d79e68e30b7a", + "aa_seq_len": 167, + "aa_seq_hash": "fff51d2396f3da88a775416b4c6d14b6", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "14": { + "seq_id": 14, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 316, + "dna_seq_len": 432, + "dna_seq_hash": "3922f6256f2891400db415013eb0b208", + "aa_seq_len": 144, + "aa_seq_hash": "0af9d546dfcaf93373a8919df3e30323", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "15": { + "seq_id": 15, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 343, + "dna_seq_len": 432, + "dna_seq_hash": "f76c13e33ad5b502dfe64181dbdf2378", + "aa_seq_len": 144, + "aa_seq_hash": "32484f065f9013aaa5b3c694cc99cdbf", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "16": { + "seq_id": 16, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 472, + "dna_seq_len": 438, + "dna_seq_hash": "80bea3abd165ee14e51bc9e9779fc6a1", + "aa_seq_len": 146, + "aa_seq_hash": "4e9cc2d289f1c946738cc8e6e4ef1186", + "dna_min_len": 306, + "dna_max_len": 744, + "aa_min_len": 102, + "aa_max_len": 248, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "17": { + "seq_id": 17, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 489, + "dna_seq_len": 432, + "dna_seq_hash": "83a314185d9ff0bf7c2953d30979e7eb", + "aa_seq_len": 144, + "aa_seq_hash": "5f9fc3707789543f2f14b0f1a555a05c", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "18": { + "seq_id": 18, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 497, + "dna_seq_len": 432, + "dna_seq_hash": "c70622b317de74bdaf57eb8bb5134537", + "aa_seq_len": 144, + "aa_seq_hash": "56b3d46d3e517eb7f83f089f9ed5ae2a", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "19": { + "seq_id": 19, + "locus_name": "hemD", + "locus_name_alt": "SALM25361", + "locus_product": "uroporphyrinogen-III synthase", + "locus_description": NaN, + "locus_uid": 498, + "dna_seq_len": 432, + "dna_seq_hash": "f284b11b34de688e2ef54c1b73936595", + "aa_seq_len": 144, + "aa_seq_hash": "da558cdebd900031d0df8f58ef01454e", + "dna_min_len": 302, + "dna_max_len": 734, + "aa_min_len": 100, + "aa_max_len": 244, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "20": { + "seq_id": 20, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "9f762c246c542c52c94c5022ca62311c", + "aa_seq_len": 167, + "aa_seq_hash": "447381a0d286fa1037b5499e2242819a", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "21": { + "seq_id": 21, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 754, + "dna_seq_len": 501, + "dna_seq_hash": "65b434bea0d1939d2b748dbc5dd6df8b", + "aa_seq_len": 167, + "aa_seq_hash": "2b685aa7892794b69c9faa20c58a9183", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "22": { + "seq_id": 22, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 757, + "dna_seq_len": 501, + "dna_seq_hash": "eccfc35078428e44e5dd3e85d9ebf1fe", + "aa_seq_len": 167, + "aa_seq_hash": "35fa89ee4cd8689b89d553157471afe0", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "23": { + "seq_id": 23, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 759, + "dna_seq_len": 501, + "dna_seq_hash": "ce01d780cd0ffe3197f708d7048a473b", + "aa_seq_len": 167, + "aa_seq_hash": "bc0edd26ea6032cc4939e8cbc17a12d3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "24": { + "seq_id": 24, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 768, + "dna_seq_len": 501, + "dna_seq_hash": "23377e95fe00bf6a16b51fe8929a938a", + "aa_seq_len": 167, + "aa_seq_hash": "9fb34628ef67396ed38c755280e04f7e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "25": { + "seq_id": 25, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 838, + "dna_seq_len": 501, + "dna_seq_hash": "8478cdd016753651cd73afc4ad20c7df", + "aa_seq_len": 167, + "aa_seq_hash": "6512669779521a6792ecdae3088467f7", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "26": { + "seq_id": 26, + "locus_name": "hisD", + "locus_name_alt": "SALM25362", + "locus_product": "histidinol dehydrogenase", + "locus_description": NaN, + "locus_uid": 907, + "dna_seq_len": 501, + "dna_seq_hash": "ab935d39fffeff601d95a8362ba454f3", + "aa_seq_len": 167, + "aa_seq_hash": "1c277aef51e883e29ee8b489c525ea1b", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "27": { + "seq_id": 27, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 24, + "dna_seq_len": 399, + "dna_seq_hash": "a7af783dc7084f1b8bc593aa29f80003", + "aa_seq_len": 133, + "aa_seq_hash": "46a0c532edb92303b1b9d12a80056a60", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "28": { + "seq_id": 28, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 48, + "dna_seq_len": 399, + "dna_seq_hash": "9fb313e6232b0d0e14d2fc4be7c409f7", + "aa_seq_len": 133, + "aa_seq_hash": "0e56efdd1f7fbaf132524616e29d98ca", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "29": { + "seq_id": 29, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 317, + "dna_seq_len": 399, + "dna_seq_hash": "50cd750e2f6860dd489040f1d5f64f9b", + "aa_seq_len": 133, + "aa_seq_hash": "18e887a66ce56a930dbf8db48b406596", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "30": { + "seq_id": 30, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 487, + "dna_seq_len": 399, + "dna_seq_hash": "0e1384e36f3897f65690f9230d2bcd73", + "aa_seq_len": 133, + "aa_seq_hash": "20c9a488aa6542257a151ced866d2f8f", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "31": { + "seq_id": 31, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 608, + "dna_seq_len": 399, + "dna_seq_hash": "e180fd1852382c132851674a9e379c03", + "aa_seq_len": 133, + "aa_seq_hash": "c7da76b50946241fe125348a19a9b6a3", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "32": { + "seq_id": 32, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 611, + "dna_seq_len": 399, + "dna_seq_hash": "0ec842f985e93041c928ab7bb137295d", + "aa_seq_len": 133, + "aa_seq_hash": "be3990f2abaa8780b14e62d4fc8cd82a", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "33": { + "seq_id": 33, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 612, + "dna_seq_len": 399, + "dna_seq_hash": "9d42e484ea2936f87312f07abf0ad84a", + "aa_seq_len": 133, + "aa_seq_hash": "7af624e3930c7a5ab7785b08d925081c", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "34": { + "seq_id": 34, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 619, + "dna_seq_len": 399, + "dna_seq_hash": "02949c6f858f3cc5de1b13c9f5a40705", + "aa_seq_len": 133, + "aa_seq_hash": "52d120d4090a22e450633e01e4ccb729", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "35": { + "seq_id": 35, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 631, + "dna_seq_len": 315, + "dna_seq_hash": "c4715d7df9a9eebfe5a334dd55ee469b", + "aa_seq_len": 105, + "aa_seq_hash": "31aa38918b303bf67374188e11413e59", + "dna_min_len": 220, + "dna_max_len": 535, + "aa_min_len": 73, + "aa_max_len": 178, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "36": { + "seq_id": 36, + "locus_name": "purE", + "locus_name_alt": "SALM25363", + "locus_product": "phosphoribosylaminoimidazole carboxylase catalytic subunit", + "locus_description": NaN, + "locus_uid": 724, + "dna_seq_len": 399, + "dna_seq_hash": "782d08e7ee8a031a1402020e708bfbbc", + "aa_seq_len": 133, + "aa_seq_hash": "b5f9063808b8be839e7f169bf73c88e4", + "dna_min_len": 279, + "dna_max_len": 678, + "aa_min_len": 93, + "aa_max_len": 226, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "37": { + "seq_id": 37, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "481b6454f33fae7875b4978c14094ec3", + "aa_seq_len": 167, + "aa_seq_hash": "fa04457773c66ae015014e915af2516d", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "38": { + "seq_id": 38, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 30, + "dna_seq_len": 501, + "dna_seq_hash": "79048d21794195277a6af839be13e6e1", + "aa_seq_len": 167, + "aa_seq_hash": "186c53cb5c2bf0b7ecac853c6067065d", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "39": { + "seq_id": 39, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 281, + "dna_seq_len": 501, + "dna_seq_hash": "f10d273aa97d5556a43b96721d666975", + "aa_seq_len": 167, + "aa_seq_hash": "4172d5e8c8265884fe5479e10527cb02", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "40": { + "seq_id": 40, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 399, + "dna_seq_len": 501, + "dna_seq_hash": "1839775cc7c29412648ec7b004e1a417", + "aa_seq_len": 167, + "aa_seq_hash": "c4cfbbf5c5814829188f4f404f312bd3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "41": { + "seq_id": 41, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 571, + "dna_seq_len": 501, + "dna_seq_hash": "fce3e68952108e415579b3ad24a3f150", + "aa_seq_len": 167, + "aa_seq_hash": "43372b6526524f5ed4542be83b5b8614", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "42": { + "seq_id": 42, + "locus_name": "sucA", + "locus_name_alt": "SALM25364", + "locus_product": "2-oxoglutarate dehydrogenase E1 component", + "locus_description": NaN, + "locus_uid": 686, + "dna_seq_len": 501, + "dna_seq_hash": "629ea0cbfe0d2e9f34b1ca034a6c55fd", + "aa_seq_len": 167, + "aa_seq_hash": "c4cfbbf5c5814829188f4f404f312bd3", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "43": { + "seq_id": 43, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 1, + "dna_seq_len": 501, + "dna_seq_hash": "eaec644b411bd0b3ab1e086fbabd29c9", + "aa_seq_len": 167, + "aa_seq_hash": "bfe756f2f421db752907a171f3a44d69", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "44": { + "seq_id": 44, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 31, + "dna_seq_len": 501, + "dna_seq_hash": "97e4acce4e840b1c48de51f55fccf620", + "aa_seq_len": 167, + "aa_seq_hash": "be9296cb1ea9443fb43c0f967d107988", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "45": { + "seq_id": 45, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 208, + "dna_seq_len": 501, + "dna_seq_hash": "fbc6cb34cddfb1fe6a7806d5f7613259", + "aa_seq_len": 167, + "aa_seq_hash": "b788ec581475c9ba71d997b2db6e1def", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "46": { + "seq_id": 46, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 630, + "dna_seq_len": 501, + "dna_seq_hash": "ce58c0cacd4e8d9fa4867d11f2add864", + "aa_seq_len": 167, + "aa_seq_hash": "c062c5c88bdebdf2883e06fe6823c71c", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "47": { + "seq_id": 47, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 631, + "dna_seq_len": 501, + "dna_seq_hash": "949426df5430f94547459d06c786d77b", + "aa_seq_len": 167, + "aa_seq_hash": "dac50e2b5df83fe87c9826ecf99d568e", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "48": { + "seq_id": 48, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 632, + "dna_seq_len": 501, + "dna_seq_hash": "9a187a6b3e4675fe12ea213c7a23577c", + "aa_seq_len": 167, + "aa_seq_hash": "6536824faaa7880cfb44a6cd1ed057c9", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "49": { + "seq_id": 49, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 633, + "dna_seq_len": 501, + "dna_seq_hash": "7be8b9732228c1f82630b547d7011a5e", + "aa_seq_len": 167, + "aa_seq_hash": "1eac2cb94b8f619df1c9b0f3369f4a96", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "50": { + "seq_id": 50, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 637, + "dna_seq_len": 501, + "dna_seq_hash": "1895acdf991b49a885873fe82ce9ca85", + "aa_seq_len": 167, + "aa_seq_hash": "9fe9521d0bf495570a0fd425c0e48764", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "51": { + "seq_id": 51, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 638, + "dna_seq_len": 501, + "dna_seq_hash": "9776bbec78b5214d3dfca0d32b395d4b", + "aa_seq_len": 167, + "aa_seq_hash": "2914d167cc3579348e36d16afc628a39", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + }, + "52": { + "seq_id": 52, + "locus_name": "thrA", + "locus_name_alt": "SALM25365", + "locus_product": "bifunctional aspartate kinase/homoserine dehydrogenase I", + "locus_description": NaN, + "locus_uid": 748, + "dna_seq_len": 501, + "dna_seq_hash": "6cf9d69644c819d9ecd3a0fd090977fc", + "aa_seq_len": 167, + "aa_seq_hash": "cf0168a601a4f5792c7326a2da650edb", + "dna_min_len": 350, + "dna_max_len": 851, + "aa_min_len": 116, + "aa_max_len": 283, + "dna_min_ident": 80, + "aa_min_ident": 80, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "locus_type": "mlst" + } + } +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/pass_single/pass_one_db/results.json b/locidex/example/manifest_in/passes/pass_single/pass_one_db/results.json new file mode 100644 index 0000000..5252454 --- /dev/null +++ b/locidex/example/manifest_in/passes/pass_single/pass_one_db/results.json @@ -0,0 +1,14 @@ +{ + "analysis_start_time": "2024-04-04 14:12:12", + "parameters": { + "input_file": "locidex/example/build_db_mlst_in/senterica.mlst.txt", + "outdir": "/tmp/pytest-of-mwells/pytest-82/build0", + "name": "Locidex Database", + "db_ver": "1.0.0", + "db_desc": "", + "author": "", + "date": "", + "force": true + }, + "analysis_end_time": "2024-04-04 14:12:12" +} \ No newline at end of file diff --git a/locidex/example/manifest_in/passes/run.json b/locidex/example/manifest_in/passes/run.json deleted file mode 100644 index 41cee63..0000000 --- a/locidex/example/manifest_in/passes/run.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "analysis_start_time": "25/04/2024 14:12:06", - "parameters": { - "input": "./locidex/example/manifest_in/passes/" - }, - "analysis_end_time": "25/04/2024 14:12:06" -} \ No newline at end of file diff --git a/locidex/manifest.py b/locidex/manifest.py index ccaa704..556cc1b 100644 --- a/locidex/manifest.py +++ b/locidex/manifest.py @@ -1,103 +1,167 @@ import pathlib import json +from typing import List, Union, Tuple, Dict import os import re import sys from argparse import (ArgumentParser, ArgumentDefaultsHelpFormatter, RawDescriptionHelpFormatter) from datetime import datetime from locidex.version import __version__ -from locidex.constants import DBConfig +from locidex.constants import DBConfig, DBFiles, ManifestFields def add_args(parser=None): if parser is None: parser = ArgumentParser( description="Locidex manifest: Setup directory of databases for use with search") - parser.add_argument('-i','--input', type=str, required=True,help='Input directory of locidex databases') + parser.add_argument('-i','--input', type=str, required=True,help='Input directory containing multiplie locidex databases') parser.add_argument('-V', '--version', action='version', version="%(prog)s " + __version__) return parser -def run_merge(config): - analysis_parameters = config - - #Input Parameters - input_dir = config['input'] - in_dirname = input_dir.split('/')[-1] - - run_data = {} - run_data['analysis_start_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") - run_data['parameters'] = analysis_parameters - - db_keys = DBConfig.keys() - - d = pathlib.Path(input_dir).rglob('*') - config_files = {} - for item in d: - if item.is_dir(): - continue - fpath = item.resolve() - dirname = os.path.dirname(fpath).split('/')[-1] - fname = os.path.basename(item) - if fname != 'config.json': - continue - c = {} - with open(fpath ,'r') as fh: - c = json.load(fh) - if len(c) == 0: - continue - for field in db_keys: - if not field in c: - print(f'Error db config: {fpath} is missing a needed field key for {field}, please set one', file=sys.stderr) - raise KeyError - - v = c[field] - if v == '': - print(f'Error db config: {fpath} is missing a needed field value for {field}, please set one', file=sys.stderr) - raise KeyError - - db_name = str(c['db_name']) - db_version = str(c['db_version']) - if not db_name in config_files: - config_files[db_name] = {} - if db_version in config_files[db_name]: - print(f"Error you are trying to populate duplicate entries for db_name {db_name} and version {db_version}. \ - Manifest only supports distinct db_entries, please resolve duplicates", file=sys.stderr) - sys.exit() - - config_files[db_name][db_version] = { - #'db_relative_path_dir': f"{in_dirname}/{dirname}", - 'db_relative_path_dir': os.path.join(in_dirname, dirname), - #'db_relative_path_config': f"{in_dirname}/{dirname}/config.json", - 'db_relative_path_config': os.path.join(in_dirname, dirname, "config.json"), - } - - with open(os.path.join(input_dir,"manifest.json"),'w' ) as fh: - fh.write(json.dumps(config_files, indent=4)) - - run_data['analysis_end_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") - with open(os.path.join(input_dir,"run.json"),'w' ) as fh: - fh.write(json.dumps(run_data, indent=4)) - - - -def run(cmd_args=None): - #cmd_args = parse_args() - if cmd_args is None: - parser = add_args() - cmd_args = parser.parse_args() - analysis_parameters = vars(cmd_args) - - config = {} - for p in analysis_parameters: - if not p in config: - config[p] = analysis_parameters[p] - - run_merge(config) - - -# call main function -if __name__ == '__main__': - run() +def check_config(directory: pathlib.Path) -> None: + """ + Validate config file in a directory. Throws an error if any required parameters + are missing. + + directory: Path of the directory containing the parent. + """ + + config_dir = pathlib.Path(directory / DBFiles.config_file) + config_data: Union[DBConfig, None] = None + with open(config_dir, 'r') as conf: + config_data = DBConfig(**json.load(conf)) + for k, v in config_data.to_dict().items(): + if v is None or v == '': + raise AttributeError("Config cannot have missing values: {}".format(k)) + return config_data + +def validate_db_files(allele_dir: List[pathlib.Path]) -> List[Tuple[pathlib.Path, DBConfig]]: + """ + Validates a directory of allele databases, and verifies that the config contains + the required fields + + allele_dir List[pathlib.Path: Directory of various allele databases needed by mikrokondo + """ + db_configs: Tuple[pathlib.Path, DBConfig] = [] + for a_dir in allele_dir: + for k, v in DBFiles.items(): + if not pathlib.Path(a_dir / v).exists(): + raise FileNotFoundError("Required file {} does not exist.".format(k)) + db_configs.append((a_dir, check_config(a_dir))) + return db_configs + + +def check_dbs(file_in: pathlib.Path) -> List[pathlib.PosixPath]: + """ + Checks that all locidex databases in a directory are complete. + + file_in: A path to a directory of databases + """ + db_dirs = [p for p in file_in.iterdir() if p.is_dir()] + return db_dirs + +def create_manifest(file_in: pathlib.Path): + """ + Create a manifest file for each of the locidex dbs. + + file_in pathlib.Path: File path to directory of databases + """ + allele_dirs: List[pathlib.Path] = check_dbs(file_in) + validated_dbs: List[Tuple[pathlib.Path, DBConfig]] = validate_db_files(allele_dirs) + db_manifest = dict() + for path, conf in validated_dbs: + if db_manifest.get(conf.db_name) is not None: + raise KeyError("Databases with the same name have been specified (name: {}, path: {})".format(conf.db_name, path)) + + db_manifest[conf.db_name] = { + ManifestFields.db_path: str(path), + ManifestFields.config_data: conf.to_dict() + } + return db_manifest + + +#def run_merge(config): +# analysis_parameters = config +# +# #Input Parameters +# input_dir = config['input'] +# in_dirname = input_dir.split('/')[-1] +# +# run_data = {} +# run_data['analysis_start_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") +# run_data['parameters'] = analysis_parameters +# +# db_keys = DBConfig.keys() +# +# d = pathlib.Path(input_dir).rglob('*') +# config_files = {} +# for item in d: +# if item.is_dir(): +# continue +# fpath = item.resolve() +# dirname = os.path.dirname(fpath).split('/')[-1] +# fname = os.path.basename(item) +# if fname != 'config.json': +# continue +# c = {} +# with open(fpath ,'r') as fh: +# c = json.load(fh) +# if len(c) == 0: +# continue +# for field in db_keys: +# if not field in c: +# print(f'Error db config: {fpath} is missing a needed field key for {field}, please set one', file=sys.stderr) +# raise KeyError +# +# v = c[field] +# if v == '': +# print(f'Error db config: {fpath} is missing a needed field value for {field}, please set one', file=sys.stderr) +# raise KeyError +# +# db_name = str(c['db_name']) +# db_version = str(c['db_version']) +# if not db_name in config_files: +# config_files[db_name] = {} +# if db_version in config_files[db_name]: +# print(f"Error you are trying to populate duplicate entries for db_name {db_name} and version {db_version}. \ +# Manifest only supports distinct db_entries, please resolve duplicates", file=sys.stderr) +# sys.exit() +# +# config_files[db_name][db_version] = { +# #'db_relative_path_dir': f"{in_dirname}/{dirname}", +# 'db_relative_path_dir': os.path.join(in_dirname, dirname), +# #'db_relative_path_config': f"{in_dirname}/{dirname}/config.json", +# 'db_relative_path_config': os.path.join(in_dirname, dirname, "config.json"), +# } +# +# with open(os.path.join(input_dir,"manifest.json"),'w' ) as fh: +# fh.write(json.dumps(config_files, indent=4)) +# +# run_data['analysis_end_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") +# with open(os.path.join(input_dir,"run.json"),'w' ) as fh: +# fh.write(json.dumps(run_data, indent=4)) +# +# +# +#def run(cmd_args=None): +# #cmd_args = parse_args() +# if cmd_args is None: +# parser = add_args() +# cmd_args = parser.parse_args() +# analysis_parameters = vars(cmd_args) +# +# +# config = {} +# for p in analysis_parameters: +# if not p in config: +# config[p] = analysis_parameters[p] +# +# run_merge(config) +# +# +## call main function +#if __name__ == '__main__': +# run() diff --git a/tests/test_manifest.py b/tests/test_manifest.py new file mode 100644 index 0000000..5b737e8 --- /dev/null +++ b/tests/test_manifest.py @@ -0,0 +1,94 @@ +""" +Test manifest module +""" + +import pytest +import os +import json +import shutil +from pathlib import PosixPath, Path +from locidex import manifest +from locidex.constants import DBConfig +from dataclasses import dataclass + + +TEST_FAIL_AUTHOR = "locidex/example/manifest_in/fails/fails_author" +TEST_FAIL_DESC = "locidex/example/manifest_in/fails/fails_name" +TEST_PASS_MULTIPLE = "locidex/example/manifest_in/passes/pass_multiple" +TEST_PASS_SINGLE = "locidex/example/manifest_in/passes/pass_single" + +@dataclass +class CMDArgs: + input: os.PathLike + + +def test_db_list(): + """ + List all databases in a test directory + """ + assert manifest.check_dbs(Path(TEST_PASS_MULTIPLE)) == [PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_three_db'), + PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_two_db'), + PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_one_db')] + +@pytest.mark.parametrize("input_dir,output", +[ + (TEST_PASS_MULTIPLE, + [ + (PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_three_db'), + DBConfig(db_name='Locidex Database 3', db_version='1.0.0', db_date='04/04/2024', db_author='test1', db_desc='test1', db_num_seqs=53, is_nucl=True, is_prot=True, nucleotide_db_name='nucleotide', protein_db_name='protein')), + (PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_two_db'), + DBConfig(db_name='Locidex Database 2', db_version='1.0.0', db_date='04/04/2024', db_author='test1', db_desc='test1', db_num_seqs=53, is_nucl=True, is_prot=True, nucleotide_db_name='nucleotide', protein_db_name='protein')), + (PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_one_db'), + DBConfig(db_name='Locidex Database 1', db_version='1.0.0', db_date='04/04/2024', db_author='test1', db_desc='test1', db_num_seqs=53, is_nucl=True, is_prot=True, nucleotide_db_name='nucleotide', protein_db_name='protein'))]), + (TEST_PASS_SINGLE, + [(PosixPath('locidex/example/manifest_in/passes/pass_single/pass_one_db'), + DBConfig(db_name='Locidex Database', db_version='1.0.0', db_date='04/04/2024', db_author='test1', db_desc='test1', db_num_seqs=53, is_nucl=True, is_prot=True, nucleotide_db_name='nucleotide', protein_db_name='protein'))]), +]) +def test_pass_validate_db_files(input_dir, output): + dbs = manifest.check_dbs(Path(input_dir)) + assert manifest.validate_db_files(dbs) == output + +def test_fail_validate_db_files_author(capsys): + with pytest.raises(AttributeError): + manifest.check_config(Path(TEST_FAIL_AUTHOR)) + assert "Config cannot have missing values: db_author is empty" == capsys.readouterr() + +def test_fail_validate_db_files_description(capsys): + with pytest.raises(AttributeError): + manifest.check_config(Path(TEST_FAIL_AUTHOR)) + assert "Config cannot have missing values: db_desc is empty" == capsys.readouterr() + + +def test_create_manifest_multiple(): + output = {'Locidex Database 3': + {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_three_db', + 'config': {'db_name': 'Locidex Database 3', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}, + 'Locidex Database 2': + {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_two_db', + 'config': {'db_name': 'Locidex Database 2', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}, + 'Locidex Database 1': + {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_one_db', + 'config': {'db_name': 'Locidex Database 1', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}} + assert manifest.create_manifest(Path(TEST_PASS_MULTIPLE)) == output + +def test_create_manifest_single(): + output = {'Locidex Database': + {'path': 'locidex/example/manifest_in/passes/pass_single/pass_one_db', + 'config': {'db_name': 'Locidex Database', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}} + assert manifest.create_manifest(Path(TEST_PASS_SINGLE)) == output + + +#def test_no_db_author(tmpdir, capsys): +# out_dir = os.path.join(tmpdir, "build") +# shutil.copytree(TEST_FAIL_AUTHOR, out_dir) +# with pytest.raises(KeyError): +# manifest.run(CMDArgs(input=out_dir)) +# assert "is missing a needed field value for db_desc, please set one" in capsys.readouterr() +# +#def test_no_db_description(tmpdir, capsys): +# outdir = os.path.join(tmpdir, "build") +# shutil.copytree(TEST_FAIL_DESC, outdir) +# with pytest.raises(KeyError): +# manifest.run(CMDArgs(input=outdir)) +# assert "is missing a needed field value for db_author, please set one" in capsys.readouterr() + From 156208fe2b576d6a6368237a8ca95dba8b0936d2 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 30 Apr 2024 09:30:07 -0500 Subject: [PATCH 34/51] refactored merge module --- locidex/manifest.py | 109 ++++++++++------------------------------- tests/test_manifest.py | 19 +++---- 2 files changed, 33 insertions(+), 95 deletions(-) diff --git a/locidex/manifest.py b/locidex/manifest.py index 556cc1b..afb0216 100644 --- a/locidex/manifest.py +++ b/locidex/manifest.py @@ -81,87 +81,32 @@ def create_manifest(file_in: pathlib.Path): return db_manifest -#def run_merge(config): -# analysis_parameters = config -# -# #Input Parameters -# input_dir = config['input'] -# in_dirname = input_dir.split('/')[-1] -# -# run_data = {} -# run_data['analysis_start_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") -# run_data['parameters'] = analysis_parameters -# -# db_keys = DBConfig.keys() -# -# d = pathlib.Path(input_dir).rglob('*') -# config_files = {} -# for item in d: -# if item.is_dir(): -# continue -# fpath = item.resolve() -# dirname = os.path.dirname(fpath).split('/')[-1] -# fname = os.path.basename(item) -# if fname != 'config.json': -# continue -# c = {} -# with open(fpath ,'r') as fh: -# c = json.load(fh) -# if len(c) == 0: -# continue -# for field in db_keys: -# if not field in c: -# print(f'Error db config: {fpath} is missing a needed field key for {field}, please set one', file=sys.stderr) -# raise KeyError -# -# v = c[field] -# if v == '': -# print(f'Error db config: {fpath} is missing a needed field value for {field}, please set one', file=sys.stderr) -# raise KeyError -# -# db_name = str(c['db_name']) -# db_version = str(c['db_version']) -# if not db_name in config_files: -# config_files[db_name] = {} -# if db_version in config_files[db_name]: -# print(f"Error you are trying to populate duplicate entries for db_name {db_name} and version {db_version}. \ -# Manifest only supports distinct db_entries, please resolve duplicates", file=sys.stderr) -# sys.exit() -# -# config_files[db_name][db_version] = { -# #'db_relative_path_dir': f"{in_dirname}/{dirname}", -# 'db_relative_path_dir': os.path.join(in_dirname, dirname), -# #'db_relative_path_config': f"{in_dirname}/{dirname}/config.json", -# 'db_relative_path_config': os.path.join(in_dirname, dirname, "config.json"), -# } -# -# with open(os.path.join(input_dir,"manifest.json"),'w' ) as fh: -# fh.write(json.dumps(config_files, indent=4)) -# -# run_data['analysis_end_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") -# with open(os.path.join(input_dir,"run.json"),'w' ) as fh: -# fh.write(json.dumps(run_data, indent=4)) -# -# -# -#def run(cmd_args=None): -# #cmd_args = parse_args() -# if cmd_args is None: -# parser = add_args() -# cmd_args = parser.parse_args() -# analysis_parameters = vars(cmd_args) -# -# -# config = {} -# for p in analysis_parameters: -# if not p in config: -# config[p] = analysis_parameters[p] -# -# run_merge(config) -# -# -## call main function -#if __name__ == '__main__': -# run() +def write_manifest(file_in: pathlib.Path, manifest: Dict[str, Dict[str, Union[str, Dict[str, str]]]]) -> None: + """ + Write the manifest.json file + + file_in Path: Specified input directory + manifest dict: data to write to the manifest + """ + + manifest_file = "manifest.json" + path_out = file_in.joinpath(manifest_file) + with open(path_out, 'w', encoding='utf8') as m_out: + json.dump(manifest, m_out, indent=2) + return path_out + + +def run(cmd_args): + if cmd_args is None: + parser = add_args() + cmd_args = parser.parse_args() + directory_in = pathlib.Path(cmd_args.input) + directory_in.exists() + manifest = create_manifest(directory_in) + return write_manifest(directory_in, manifest) + +# call main function +if __name__ == '__main__': + run() diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 5b737e8..fc4b9c3 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -78,17 +78,10 @@ def test_create_manifest_single(): assert manifest.create_manifest(Path(TEST_PASS_SINGLE)) == output -#def test_no_db_author(tmpdir, capsys): -# out_dir = os.path.join(tmpdir, "build") -# shutil.copytree(TEST_FAIL_AUTHOR, out_dir) -# with pytest.raises(KeyError): -# manifest.run(CMDArgs(input=out_dir)) -# assert "is missing a needed field value for db_desc, please set one" in capsys.readouterr() -# -#def test_no_db_description(tmpdir, capsys): -# outdir = os.path.join(tmpdir, "build") -# shutil.copytree(TEST_FAIL_DESC, outdir) -# with pytest.raises(KeyError): -# manifest.run(CMDArgs(input=outdir)) -# assert "is missing a needed field value for db_author, please set one" in capsys.readouterr() +def test_write_manifest(tmpdir): + outdir = tmpdir / "build" + shutil.copytree(TEST_PASS_MULTIPLE, outdir) + cmd_args = CMDArgs(input=TEST_PASS_MULTIPLE) + file_out = manifest.run(cmd_args=cmd_args) + assert file_out.exists() From b1f6814f86e2ff5832b76ed4173dc59b559f8497 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 30 Apr 2024 09:38:44 -0500 Subject: [PATCH 35/51] refactored manifest module --- locidex/manifest.py | 15 ++++++++++++++- tests/test_manifest.py | 19 ++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/locidex/manifest.py b/locidex/manifest.py index afb0216..8c1d0df 100644 --- a/locidex/manifest.py +++ b/locidex/manifest.py @@ -1,6 +1,7 @@ import pathlib import json from typing import List, Union, Tuple, Dict +from dataclasses import dataclass import os import re import sys @@ -10,6 +11,10 @@ from locidex.constants import DBConfig, DBFiles, ManifestFields +@dataclass(frozen=True) +class _Constants: + manifest_name: pathlib.Path = "manifest.json" + def add_args(parser=None): if parser is None: parser = ArgumentParser( @@ -89,7 +94,7 @@ def write_manifest(file_in: pathlib.Path, manifest: Dict[str, Dict[str, Union[st manifest dict: data to write to the manifest """ - manifest_file = "manifest.json" + manifest_file = _Constants.manifest_name path_out = file_in.joinpath(manifest_file) with open(path_out, 'w', encoding='utf8') as m_out: json.dump(manifest, m_out, indent=2) @@ -105,6 +110,14 @@ def run(cmd_args): manifest = create_manifest(directory_in) return write_manifest(directory_in, manifest) +def read_manifest(input_file: pathlib.Path) -> dict: + """ + input_file Path: Manifest file to be parsed + """ + manifest_file = input_file / _Constants.manifest_name + with open(manifest_file, 'r', encoding='utf8') as mani_in: + manifest = json.load(mani_in) + return manifest # call main function if __name__ == '__main__': diff --git a/tests/test_manifest.py b/tests/test_manifest.py index fc4b9c3..f8f5c6b 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -19,7 +19,7 @@ @dataclass class CMDArgs: - input: os.PathLike + input: Path def test_db_list(): @@ -85,3 +85,20 @@ def test_write_manifest(tmpdir): file_out = manifest.run(cmd_args=cmd_args) assert file_out.exists() + +def test_read_manifest(tmpdir): + outdir = tmpdir / "build" + shutil.copytree(TEST_PASS_MULTIPLE, outdir) + cmd_args = CMDArgs(input=Path(TEST_PASS_MULTIPLE)) + file_out = manifest.run(cmd_args=cmd_args) + assert file_out.exists() + output = {'Locidex Database 3': + {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_three_db', + 'config': {'db_name': 'Locidex Database 3', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}, + 'Locidex Database 2': + {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_two_db', + 'config': {'db_name': 'Locidex Database 2', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}, + 'Locidex Database 1': + {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_one_db', + 'config': {'db_name': 'Locidex Database 1', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}} + assert manifest.read_manifest(cmd_args.input) == output From 1bd9973c5ac050121206d35da1bfbebca86a7919 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 30 Apr 2024 10:00:33 -0500 Subject: [PATCH 36/51] refactored manifest module and tests --- locidex/manifest.py | 46 ++++++++++++++++++++++++++++++++++++++---- tests/test_manifest.py | 7 ++++++- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/locidex/manifest.py b/locidex/manifest.py index 8c1d0df..b762b6d 100644 --- a/locidex/manifest.py +++ b/locidex/manifest.py @@ -11,6 +11,41 @@ from locidex.constants import DBConfig, DBFiles, ManifestFields + +class ManifestItem: + """ + Manifest item created for exporting and importing locidex items + """ + __path_key = 'path' + __config_key = 'config' + + def __init__(self, db_path: pathlib.Path, config: DBConfig): + self.db = db_path + self.config = config + + def to_dict(self): + return {self.__path_key: str(self.db), self.__config_key: self.config.to_dict()} + + def __repr__(self) -> str: + return "Allele location: {}\n Config data: {}".format(self.db, self.config) + + @classmethod + def path_key(cls): + """ + ! Not passing this as a property as apparently that will be deprecated in 3.13 + """ + return cls.__path_key + + @classmethod + def config_key(cls): + """ + ! Not passing this as a property as apparently that will be deprecated in 3.13 + """ + return cls.__config_key + + + + @dataclass(frozen=True) class _Constants: manifest_name: pathlib.Path = "manifest.json" @@ -80,13 +115,12 @@ def create_manifest(file_in: pathlib.Path): raise KeyError("Databases with the same name have been specified (name: {}, path: {})".format(conf.db_name, path)) db_manifest[conf.db_name] = { - ManifestFields.db_path: str(path), - ManifestFields.config_data: conf.to_dict() + **ManifestItem(db_path=path, config=conf).to_dict() } return db_manifest -def write_manifest(file_in: pathlib.Path, manifest: Dict[str, Dict[str, Union[str, Dict[str, str]]]]) -> None: +def write_manifest(file_in: pathlib.Path, manifest: Dict[str, ManifestItem]) -> pathlib.Path: """ Write the manifest.json file @@ -115,9 +149,13 @@ def read_manifest(input_file: pathlib.Path) -> dict: input_file Path: Manifest file to be parsed """ manifest_file = input_file / _Constants.manifest_name + manifest_data: Dict[str, ManifestItem] = dict() with open(manifest_file, 'r', encoding='utf8') as mani_in: manifest = json.load(mani_in) - return manifest + for k, v in manifest.items(): + manifest_item = ManifestItem(v[ManifestItem.path_key()], DBConfig(**v[ManifestItem.config_key()])) + manifest_data[k] = manifest_item + return manifest_data # call main function if __name__ == '__main__': diff --git a/tests/test_manifest.py b/tests/test_manifest.py index f8f5c6b..2b7b5a0 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -10,6 +10,7 @@ from locidex import manifest from locidex.constants import DBConfig from dataclasses import dataclass +from typing import Dict TEST_FAIL_AUTHOR = "locidex/example/manifest_in/fails/fails_author" @@ -101,4 +102,8 @@ def test_read_manifest(tmpdir): 'Locidex Database 1': {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_one_db', 'config': {'db_name': 'Locidex Database 1', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}} - assert manifest.read_manifest(cmd_args.input) == output + + manifest_data: Dict[str, manifest.ManifestItem] = manifest.read_manifest(cmd_args.input) + for k, v in manifest_data.items(): + comp_data = output[k] + assert v.to_dict() == comp_data From bef005ab23548d9182f357b6045608fd471d1d41 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 30 Apr 2024 10:32:00 -0500 Subject: [PATCH 37/51] refactored manifest module and tests Updated the ManifestItem class to include the root directory passed to it to aid in resolving relative paths --- .gitignore | 3 ++- locidex/build.py | 2 -- locidex/manifest.py | 35 ++++++++++++++++++++++++----------- tests/test_manifest.py | 37 +++++++++++++++++++------------------ 4 files changed, 45 insertions(+), 32 deletions(-) diff --git a/.gitignore b/.gitignore index 9036323..9816aea 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ __pycache__ -*.egg* \ No newline at end of file +*.egg* +.vscode \ No newline at end of file diff --git a/locidex/build.py b/locidex/build.py index 926d9b3..2b2ed69 100644 --- a/locidex/build.py +++ b/locidex/build.py @@ -193,8 +193,6 @@ def run(cmd_args=None): print(f'Error {input_file} does not exist, please check path and try again') sys.exit() - - #run_data['result_file'] = os.path.join(outdir) obj = locidex_build(input_file, outdir,config=config,seq_columns={'nucleotide':'dna_seq','protein':'aa_seq'},force=force) if obj.status == False: diff --git a/locidex/manifest.py b/locidex/manifest.py index b762b6d..c1d7ea3 100644 --- a/locidex/manifest.py +++ b/locidex/manifest.py @@ -19,9 +19,19 @@ class ManifestItem: __path_key = 'path' __config_key = 'config' - def __init__(self, db_path: pathlib.Path, config: DBConfig): - self.db = db_path + def __init__(self, db: pathlib.Path, root_db: pathlib.Path, config: DBConfig): + """ + db Path: Relative path to the allele database + root_db Path: The path to the manifest.json file required to resolve paths + config DBConfig: Database configuration data + """ + self.db = db self.config = config + self.root_db = root_db + + @property + def db_path(self): + return self.root_db / self.db def to_dict(self): return {self.__path_key: str(self.db), self.__config_key: self.config.to_dict()} @@ -32,14 +42,14 @@ def __repr__(self) -> str: @classmethod def path_key(cls): """ - ! Not passing this as a property as apparently that will be deprecated in 3.13 + ! Not passing this as a property for the class method as apparently that will be deprecated in 3.13 """ return cls.__path_key @classmethod def config_key(cls): """ - ! Not passing this as a property as apparently that will be deprecated in 3.13 + ! Not passing this as a property for the class method as apparently that will be deprecated in 3.13 """ return cls.__config_key @@ -76,19 +86,20 @@ def check_config(directory: pathlib.Path) -> None: raise AttributeError("Config cannot have missing values: {}".format(k)) return config_data -def validate_db_files(allele_dir: List[pathlib.Path]) -> List[Tuple[pathlib.Path, DBConfig]]: +def validate_db_files(allele_dir: List[pathlib.Path], file_in: pathlib.Path) -> List[Tuple[pathlib.Path, DBConfig]]: """ Validates a directory of allele databases, and verifies that the config contains the required fields - allele_dir List[pathlib.Path: Directory of various allele databases needed by mikrokondo + allele_dir List[pathlib.Path: Directory of various allele databases needed by locidex + file_in Path: Root directory to set files relative too """ db_configs: Tuple[pathlib.Path, DBConfig] = [] for a_dir in allele_dir: for k, v in DBFiles.items(): if not pathlib.Path(a_dir / v).exists(): raise FileNotFoundError("Required file {} does not exist.".format(k)) - db_configs.append((a_dir, check_config(a_dir))) + db_configs.append((a_dir.relative_to(file_in), check_config(a_dir))) return db_configs @@ -108,14 +119,13 @@ def create_manifest(file_in: pathlib.Path): file_in pathlib.Path: File path to directory of databases """ allele_dirs: List[pathlib.Path] = check_dbs(file_in) - validated_dbs: List[Tuple[pathlib.Path, DBConfig]] = validate_db_files(allele_dirs) + validated_dbs: List[Tuple[pathlib.Path, DBConfig]] = validate_db_files(allele_dirs, file_in) db_manifest = dict() for path, conf in validated_dbs: if db_manifest.get(conf.db_name) is not None: raise KeyError("Databases with the same name have been specified (name: {}, path: {})".format(conf.db_name, path)) - db_manifest[conf.db_name] = { - **ManifestItem(db_path=path, config=conf).to_dict() + **ManifestItem(db=path, config=conf, root_db=file_in).to_dict() } return db_manifest @@ -148,12 +158,15 @@ def read_manifest(input_file: pathlib.Path) -> dict: """ input_file Path: Manifest file to be parsed """ + if not input_file.is_dir(): + raise AssertionError("Allele database directory must be passed directly.") + manifest_file = input_file / _Constants.manifest_name manifest_data: Dict[str, ManifestItem] = dict() with open(manifest_file, 'r', encoding='utf8') as mani_in: manifest = json.load(mani_in) for k, v in manifest.items(): - manifest_item = ManifestItem(v[ManifestItem.path_key()], DBConfig(**v[ManifestItem.config_key()])) + manifest_item = ManifestItem(db=v[ManifestItem.path_key()], config=DBConfig(**v[ManifestItem.config_key()]), root_db=input_file) manifest_data[k] = manifest_item return manifest_data diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 2b7b5a0..9364263 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -35,19 +35,20 @@ def test_db_list(): [ (TEST_PASS_MULTIPLE, [ - (PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_three_db'), + (PosixPath('pass_three_db'), DBConfig(db_name='Locidex Database 3', db_version='1.0.0', db_date='04/04/2024', db_author='test1', db_desc='test1', db_num_seqs=53, is_nucl=True, is_prot=True, nucleotide_db_name='nucleotide', protein_db_name='protein')), - (PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_two_db'), + (PosixPath('pass_two_db'), DBConfig(db_name='Locidex Database 2', db_version='1.0.0', db_date='04/04/2024', db_author='test1', db_desc='test1', db_num_seqs=53, is_nucl=True, is_prot=True, nucleotide_db_name='nucleotide', protein_db_name='protein')), - (PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_one_db'), + (PosixPath('pass_one_db'), DBConfig(db_name='Locidex Database 1', db_version='1.0.0', db_date='04/04/2024', db_author='test1', db_desc='test1', db_num_seqs=53, is_nucl=True, is_prot=True, nucleotide_db_name='nucleotide', protein_db_name='protein'))]), (TEST_PASS_SINGLE, - [(PosixPath('locidex/example/manifest_in/passes/pass_single/pass_one_db'), + [(PosixPath('pass_one_db'), DBConfig(db_name='Locidex Database', db_version='1.0.0', db_date='04/04/2024', db_author='test1', db_desc='test1', db_num_seqs=53, is_nucl=True, is_prot=True, nucleotide_db_name='nucleotide', protein_db_name='protein'))]), ]) def test_pass_validate_db_files(input_dir, output): - dbs = manifest.check_dbs(Path(input_dir)) - assert manifest.validate_db_files(dbs) == output + input_path = Path(input_dir) + dbs = manifest.check_dbs(input_path) + assert manifest.validate_db_files(dbs, input_path) == output def test_fail_validate_db_files_author(capsys): with pytest.raises(AttributeError): @@ -62,19 +63,19 @@ def test_fail_validate_db_files_description(capsys): def test_create_manifest_multiple(): output = {'Locidex Database 3': - {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_three_db', + {'path': 'pass_three_db', 'config': {'db_name': 'Locidex Database 3', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}, 'Locidex Database 2': - {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_two_db', + {'path': 'pass_two_db', 'config': {'db_name': 'Locidex Database 2', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}, 'Locidex Database 1': - {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_one_db', + {'path': 'pass_one_db', 'config': {'db_name': 'Locidex Database 1', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}} assert manifest.create_manifest(Path(TEST_PASS_MULTIPLE)) == output def test_create_manifest_single(): output = {'Locidex Database': - {'path': 'locidex/example/manifest_in/passes/pass_single/pass_one_db', + {'path': 'pass_one_db', 'config': {'db_name': 'Locidex Database', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}} assert manifest.create_manifest(Path(TEST_PASS_SINGLE)) == output @@ -82,28 +83,28 @@ def test_create_manifest_single(): def test_write_manifest(tmpdir): outdir = tmpdir / "build" shutil.copytree(TEST_PASS_MULTIPLE, outdir) - cmd_args = CMDArgs(input=TEST_PASS_MULTIPLE) + cmd_args = CMDArgs(input=outdir) file_out = manifest.run(cmd_args=cmd_args) assert file_out.exists() def test_read_manifest(tmpdir): - outdir = tmpdir / "build" + outdir = Path(tmpdir / "build") shutil.copytree(TEST_PASS_MULTIPLE, outdir) - cmd_args = CMDArgs(input=Path(TEST_PASS_MULTIPLE)) + cmd_args = CMDArgs(input=outdir) file_out = manifest.run(cmd_args=cmd_args) assert file_out.exists() output = {'Locidex Database 3': - {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_three_db', + {'path': 'pass_three_db', 'config': {'db_name': 'Locidex Database 3', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}, 'Locidex Database 2': - {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_two_db', + {'path': 'pass_two_db', 'config': {'db_name': 'Locidex Database 2', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}, 'Locidex Database 1': - {'path': 'locidex/example/manifest_in/passes/pass_multiple/pass_one_db', + {'path': 'pass_one_db', 'config': {'db_name': 'Locidex Database 1', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}} - - manifest_data: Dict[str, manifest.ManifestItem] = manifest.read_manifest(cmd_args.input) + + manifest_data: Dict[str, manifest.ManifestItem] = manifest.read_manifest(outdir) for k, v in manifest_data.items(): comp_data = output[k] assert v.to_dict() == comp_data From b976d92f4648fa25b807b1150264d7af3662a55b Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 30 Apr 2024 13:49:34 -0500 Subject: [PATCH 38/51] updated manifest to allow for multiple versions of different dbs --- locidex/constants.py | 7 +++++++ locidex/manifest.py | 31 ++++++++++++++++++------------- locidex/search.py | 20 ++++++++++++++++---- tests/test_manifest.py | 31 ++++++++++++++++--------------- 4 files changed, 57 insertions(+), 32 deletions(-) diff --git a/locidex/constants.py b/locidex/constants.py index 49f0f8e..b73c7e0 100644 --- a/locidex/constants.py +++ b/locidex/constants.py @@ -64,6 +64,12 @@ EXTRACT_MODES = ['snps','trim','raw','extend'] +# Manifest opts for parsing + +OPTION_GROUPS = { + "db_group": ["db_name"], +} + @dataclass class DBConfig: db_name: Union[str, None] = None @@ -93,6 +99,7 @@ def _keys(cls) -> list: def keys(self) -> list: return [i.name for i in fields(self)] + @dataclass(frozen=True) class DBFiles: meta_file: str = "meta.json" diff --git a/locidex/manifest.py b/locidex/manifest.py index c1d7ea3..7288056 100644 --- a/locidex/manifest.py +++ b/locidex/manifest.py @@ -22,7 +22,7 @@ class ManifestItem: def __init__(self, db: pathlib.Path, root_db: pathlib.Path, config: DBConfig): """ db Path: Relative path to the allele database - root_db Path: The path to the manifest.json file required to resolve paths + root_db Path: The directory containing the manifest.json file required to resolve paths config DBConfig: Database configuration data """ self.db = db @@ -52,8 +52,6 @@ def config_key(cls): ! Not passing this as a property for the class method as apparently that will be deprecated in 3.13 """ return cls.__config_key - - @dataclass(frozen=True) @@ -112,7 +110,7 @@ def check_dbs(file_in: pathlib.Path) -> List[pathlib.PosixPath]: db_dirs = [p for p in file_in.iterdir() if p.is_dir()] return db_dirs -def create_manifest(file_in: pathlib.Path): +def create_manifest(file_in: pathlib.Path) -> Dict[str, List[Dict[str, str]]]: """ Create a manifest file for each of the locidex dbs. @@ -122,15 +120,19 @@ def create_manifest(file_in: pathlib.Path): validated_dbs: List[Tuple[pathlib.Path, DBConfig]] = validate_db_files(allele_dirs, file_in) db_manifest = dict() for path, conf in validated_dbs: - if db_manifest.get(conf.db_name) is not None: - raise KeyError("Databases with the same name have been specified (name: {}, path: {})".format(conf.db_name, path)) - db_manifest[conf.db_name] = { - **ManifestItem(db=path, config=conf, root_db=file_in).to_dict() - } + + if db_manifest.get(conf.db_name) is None: + db_manifest[conf.db_name] = [] + + if db_manifest[conf.db_name] and (versions := [i.db_version for i in db_manifest[conf.db_name]]): + if conf.db_version in versions: + raise KeyError("Databases with the same name and version have been specified (name: {}, path: {}, version: {})".format(conf.db_name, path, conf.db_version)) + + db_manifest[conf.db_name].append(ManifestItem(db=path, config=conf, root_db=file_in).to_dict()) return db_manifest -def write_manifest(file_in: pathlib.Path, manifest: Dict[str, ManifestItem]) -> pathlib.Path: +def write_manifest(file_in: pathlib.Path, manifest: Dict[str, List[Dict[str, str]]]) -> pathlib.Path: """ Write the manifest.json file @@ -165,9 +167,12 @@ def read_manifest(input_file: pathlib.Path) -> dict: manifest_data: Dict[str, ManifestItem] = dict() with open(manifest_file, 'r', encoding='utf8') as mani_in: manifest = json.load(mani_in) - for k, v in manifest.items(): - manifest_item = ManifestItem(db=v[ManifestItem.path_key()], config=DBConfig(**v[ManifestItem.config_key()]), root_db=input_file) - manifest_data[k] = manifest_item + for k, list_manifests in manifest.items(): + if manifest_data.get(k) is None: + manifest_data[k] = [] + for v in list_manifests: + manifest_item = ManifestItem(db=v[ManifestItem.path_key()], config=DBConfig(**v[ManifestItem.config_key()]), root_db=input_file) + manifest_data[k].append(manifest_item) return manifest_data # call main function diff --git a/locidex/search.py b/locidex/search.py index f4c1213..6ee3f5a 100644 --- a/locidex/search.py +++ b/locidex/search.py @@ -11,7 +11,7 @@ from locidex.classes.blast import blast_search, parse_blast from locidex.classes.db import search_db_conf, db_config from locidex.classes.seq_intake import seq_intake, seq_store -from locidex.constants import SEARCH_RUN_DATA, FILE_TYPES, BLAST_TABLE_COLS, DB_CONFIG_FIELDS, DB_EXPECTED_FILES +from locidex.constants import SEARCH_RUN_DATA, FILE_TYPES, BLAST_TABLE_COLS, DB_EXPECTED_FILES, OPTION_GROUPS from locidex.utils import write_seq_dict from locidex.version import __version__ @@ -21,10 +21,13 @@ def add_args(parser=None): description="Locidex: Advanced searching and filtering of sequence databases using query sequences",) parser.add_argument('-q','--query', type=str, required=True,help='Query sequence file') parser.add_argument('-o', '--outdir', type=str, required=True, help='Output directory to put results') + group = parser.add_mutually_exclusive_group() parser.add_argument('-n', '--name', type=str, required=False, help='Sample name to include default=filename') - parser.add_argument('-d', '--db', type=str, required=False, help='Locidex database directory') + #parser.add_argument('-d', '--db', type=str, required=False, help='Locidex database directory') + group.add_argument('-d', '--db', type=str, required=False, help='Locidex database directory') parser.add_argument('-c', '--config', type=str, required=False, help='Locidex parameter config file (json)') parser.add_argument('--db_name', type=str, required=False, help='Name of database to perform search, used when a manifest is specified as a db') + group.add_argument("--db_group", type=str, required=False, help="A directory of databases containing a manifest file. Requires the db_name option to be set to select the correct db") parser.add_argument('--db_version', type=str, required=False, help='Version of database to perform search, used when a manifest is specified as a db') parser.add_argument('--min_evalue', type=float, required=False, help='Minumum evalue required for match', default=0.0001) @@ -166,7 +169,7 @@ def run_search(config): # Validate database is valid - db_database_config = search_db_conf(db_dir, DB_EXPECTED_FILES, DB_CONFIG_FIELDS) + db_database_config = search_db_conf(db_dir, DB_EXPECTED_FILES, config.to_dict().keys()) if db_database_config.status == False: print(f'There is an issue with provided db directory: {db_dir}\n {db_database_config.messages}') sys.exit() @@ -247,7 +250,7 @@ def run_search(config): } store_obj = seq_store(sample_name, db_database_config.config_obj.config, metadata_obj.config['meta'], seq_obj.seq_data, BLAST_TABLE_COLS, hit_filters) - print(store_obj.record) + for db_label in blast_database_paths: label_col = 'index' if db_label == 'nucleotide': @@ -300,6 +303,15 @@ def run(cmd_args=None): parser = add_args() cmd_args = parser.parse_args() analysis_parameters = vars(cmd_args) + + + for opt in OPTION_GROUPS: + if analysis_parameters[opt] is not None: + for option in analysis_parameters: + if analysis_parameters[option] is None: + parser.error("Missing required parameter: {}".format(option)) + + config_file = cmd_args.config config = {} diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 9364263..156000b 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -62,21 +62,21 @@ def test_fail_validate_db_files_description(capsys): def test_create_manifest_multiple(): - output = {'Locidex Database 3': + output = {'Locidex Database 3': [ {'path': 'pass_three_db', - 'config': {'db_name': 'Locidex Database 3', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}, - 'Locidex Database 2': + 'config': {'db_name': 'Locidex Database 3', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}], + 'Locidex Database 2': [ {'path': 'pass_two_db', - 'config': {'db_name': 'Locidex Database 2', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}, - 'Locidex Database 1': + 'config': {'db_name': 'Locidex Database 2', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}], + 'Locidex Database 1': [ {'path': 'pass_one_db', - 'config': {'db_name': 'Locidex Database 1', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}} + 'config': {'db_name': 'Locidex Database 1', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}]} assert manifest.create_manifest(Path(TEST_PASS_MULTIPLE)) == output def test_create_manifest_single(): - output = {'Locidex Database': + output = {'Locidex Database': [ {'path': 'pass_one_db', - 'config': {'db_name': 'Locidex Database', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}} + 'config': {'db_name': 'Locidex Database', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}] } assert manifest.create_manifest(Path(TEST_PASS_SINGLE)) == output @@ -94,17 +94,18 @@ def test_read_manifest(tmpdir): cmd_args = CMDArgs(input=outdir) file_out = manifest.run(cmd_args=cmd_args) assert file_out.exists() - output = {'Locidex Database 3': + output = {'Locidex Database 3': [ {'path': 'pass_three_db', - 'config': {'db_name': 'Locidex Database 3', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}, - 'Locidex Database 2': + 'config': {'db_name': 'Locidex Database 3', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}], + 'Locidex Database 2': [ {'path': 'pass_two_db', - 'config': {'db_name': 'Locidex Database 2', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}, + 'config': {'db_name': 'Locidex Database 2', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}], 'Locidex Database 1': - {'path': 'pass_one_db', - 'config': {'db_name': 'Locidex Database 1', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}} + [ {'path': 'pass_one_db', + 'config': {'db_name': 'Locidex Database 1', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}}]} manifest_data: Dict[str, manifest.ManifestItem] = manifest.read_manifest(outdir) for k, v in manifest_data.items(): comp_data = output[k] - assert v.to_dict() == comp_data + v = [i.to_dict() for i in v] + assert v == comp_data From 69131cc5a7c05da977ef599c0ca21aab2d5a76d1 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 30 Apr 2024 15:31:18 -0500 Subject: [PATCH 39/51] updated extract, build, search classes --- locidex/build.py | 2 +- locidex/constants.py | 5 +- locidex/example/manifest_out/manifest.json | 55 ++++++++++++++++++++-- locidex/extract.py | 25 ++++++++-- locidex/manifest.py | 27 ++++++++++- locidex/search.py | 11 ++--- tests/test_manifest.py | 12 +++++ tests/test_workflows.yml | 8 ++++ 8 files changed, 126 insertions(+), 19 deletions(-) create mode 100644 tests/test_workflows.yml diff --git a/locidex/build.py b/locidex/build.py index 2b2ed69..a810409 100644 --- a/locidex/build.py +++ b/locidex/build.py @@ -22,7 +22,7 @@ class locidex_build: messages = [] - def __init__(self, input_file: os.PathLike, outdir: os.PathLike, config: DBConfig,seq_columns={'nucleotide':'dna_seq','protein':'aa_seq'},force=False,parse_seqids=False): + def __init__(self, input_file: os.PathLike, outdir: os.PathLike, config: DBConfig, seq_columns={'nucleotide':'dna_seq','protein':'aa_seq'},force=False,parse_seqids=False): self.input_file = input_file self.outdir = outdir self.force = force diff --git a/locidex/constants.py b/locidex/constants.py index b73c7e0..ff8541b 100644 --- a/locidex/constants.py +++ b/locidex/constants.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, asdict, fields import pathlib -from typing import Any, Union, NamedTuple +from typing import Any, Union DNA_AMBIG_CHARS = ['b', 'd', 'e', 'f', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 'u', 'v', 'w', 'x', 'y', 'z', '-'] @@ -66,8 +66,9 @@ # Manifest opts for parsing + OPTION_GROUPS = { - "db_group": ["db_name"], + "db_group": ["db_name", "db_version"], } @dataclass diff --git a/locidex/example/manifest_out/manifest.json b/locidex/example/manifest_out/manifest.json index c4376a8..04b4f47 100644 --- a/locidex/example/manifest_out/manifest.json +++ b/locidex/example/manifest_out/manifest.json @@ -1,8 +1,53 @@ { - "Locidex Database": { - "1.0.0": { - "db_relative_path_dir": "/build_db_mlst_out", - "db_relative_path_config": "/build_db_mlst_out/config.json" - } + "Locidex Database 3": [ + { + "path": "pass_three_db", + "config": { + "db_name": "Locidex Database 3", + "db_version": "1.0.0", + "db_date": "04/04/2024", + "db_author": "test1", + "db_desc": "test1", + "db_num_seqs": 53, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + } } + ], + "Locidex Database 2": [ + { + "path": "pass_two_db", + "config": { + "db_name": "Locidex Database 2", + "db_version": "1.0.0", + "db_date": "04/04/2024", + "db_author": "test1", + "db_desc": "test1", + "db_num_seqs": 53, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + } + } + ], + "Locidex Database 1": [ + { + "path": "pass_one_db", + "config": { + "db_name": "Locidex Database 1", + "db_version": "1.0.0", + "db_date": "04/04/2024", + "db_author": "test1", + "db_desc": "test1", + "db_num_seqs": 53, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + } + } + ] } \ No newline at end of file diff --git a/locidex/extract.py b/locidex/extract.py index d2d46e2..21e3dda 100644 --- a/locidex/extract.py +++ b/locidex/extract.py @@ -12,9 +12,10 @@ from locidex.classes.blast import blast_search, parse_blast from locidex.classes.db import search_db_conf, db_config from locidex.classes.seq_intake import seq_intake, seq_store -from locidex.constants import SEARCH_RUN_DATA, FILE_TYPES, BLAST_TABLE_COLS, DB_CONFIG_FIELDS, DB_EXPECTED_FILES, NT_SUB, EXTRACT_MODES +from locidex.constants import SEARCH_RUN_DATA, FILE_TYPES, BLAST_TABLE_COLS, DBConfig, DB_EXPECTED_FILES, NT_SUB, EXTRACT_MODES, OPTION_GROUPS from locidex.version import __version__ from locidex.classes.aligner import perform_alignment, aligner +import locidex.manifest as manifest def add_args(parser=None): if parser is None: @@ -24,7 +25,11 @@ def add_args(parser=None): parser.add_argument('-i','--in_fasta', type=str, required=True,help='Query assembly sequence file (fasta)') parser.add_argument('-o', '--outdir', type=str, required=True, help='Output directory to put results') parser.add_argument('-n', '--name', type=str, required=False, help='Sample name to include default=filename') - parser.add_argument('-d', '--db', type=str, required=False, help='Locidex database directory') + group = parser.add_mutually_exclusive_group() + group.add_argument('-d', '--db', type=str, required=False, help='Locidex database directory') + group.add_argument("--db_group", type=str, required=False, help="A directory of databases containing a manifest file. Requires the db_name option to be set to select the correct db") + parser.add_argument('--db_name', type=str, required=False, help='Name of database to perform search, used when a manifest is specified as a db') + parser.add_argument('--db_version', type=str, required=False, help='Version of database to perform search, used when a manifest is specified as a db') parser.add_argument('-c', '--config', type=str, required=False, help='Locidex parameter config file (json)') parser.add_argument('--min_evalue', type=float, required=False, help='Minumum evalue required for match', default=0.0001) @@ -117,7 +122,7 @@ def run_extract(config): seq_obj = seq_intake(input_fasta, format, 'source', translation_table, perform_annotation=False,skip_trans=True) # Validate database is valid - db_database_config = search_db_conf(db_dir, DB_EXPECTED_FILES, DB_CONFIG_FIELDS) + db_database_config = search_db_conf(db_dir, DB_EXPECTED_FILES, DBConfig._keys()) if db_database_config.status == False: print(f'There is an issue with provided db directory: {db_dir}\n {db_database_config.messages}') sys.exit() @@ -255,8 +260,20 @@ def run(cmd_args=None): if cmd_args is None: parser = add_args() cmd_args = parser.parser_args() - #cmd_args = parse_args() + analysis_parameters = vars(cmd_args) + + for opt in OPTION_GROUPS: + if analysis_parameters[opt] is not None: + for option in analysis_parameters: + if analysis_parameters[option] is None: + parser.error("Missing required parameter: {}".format(option)) + + if cmd_args.db_group is not None: + analysis_parameters.db = manifest.get_manifest_db(input_file=Path(cmd_args.db_group), name=cmd_args.db_name, version=cmd_args.db_version) + + + config_file = cmd_args.config config = {} diff --git a/locidex/manifest.py b/locidex/manifest.py index 7288056..6929172 100644 --- a/locidex/manifest.py +++ b/locidex/manifest.py @@ -8,7 +8,8 @@ from argparse import (ArgumentParser, ArgumentDefaultsHelpFormatter, RawDescriptionHelpFormatter) from datetime import datetime from locidex.version import __version__ -from locidex.constants import DBConfig, DBFiles, ManifestFields +from locidex.constants import DBConfig, DBFiles + @@ -156,6 +157,25 @@ def run(cmd_args): manifest = create_manifest(directory_in) return write_manifest(directory_in, manifest) +def select_db(manifest_data: Dict[str, List[ManifestItem]], name: str, version: str): + """ + Select a locidex database from the manifest file provided. + + manifest_data Dict[str, List[ManifestItem]]: Parsed manifest file data for selecting a database + name str: Name of database to select + version str: version of selected database to select + """ + db_data = manifest_data.get(name) + if db_data is None: + raise KeyError("Could not find database with specified name: {}".format(name)) + + try: + db = next(filter(lambda x: x.config.db_version == version, db_data)) + except StopIteration: + raise ValueError("No database entry with version: {}".format(version)) + + return db + def read_manifest(input_file: pathlib.Path) -> dict: """ input_file Path: Manifest file to be parsed @@ -175,6 +195,11 @@ def read_manifest(input_file: pathlib.Path) -> dict: manifest_data[k].append(manifest_item) return manifest_data +def get_manifest_db(input_file: pathlib.Path, name: str, version: str): + output = read_manifest(input_file) + db_out = select_db(output, name, version) + return db_out.db_path + # call main function if __name__ == '__main__': run() diff --git a/locidex/search.py b/locidex/search.py index 6ee3f5a..3971861 100644 --- a/locidex/search.py +++ b/locidex/search.py @@ -8,6 +8,7 @@ import pandas as pd +import locidex.manifest as manifest from locidex.classes.blast import blast_search, parse_blast from locidex.classes.db import search_db_conf, db_config from locidex.classes.seq_intake import seq_intake, seq_store @@ -22,12 +23,11 @@ def add_args(parser=None): parser.add_argument('-q','--query', type=str, required=True,help='Query sequence file') parser.add_argument('-o', '--outdir', type=str, required=True, help='Output directory to put results') group = parser.add_mutually_exclusive_group() - parser.add_argument('-n', '--name', type=str, required=False, help='Sample name to include default=filename') - #parser.add_argument('-d', '--db', type=str, required=False, help='Locidex database directory') group.add_argument('-d', '--db', type=str, required=False, help='Locidex database directory') + group.add_argument("--db_group", type=str, required=False, help="A directory of databases containing a manifest file. Requires the db_name option to be set to select the correct db") + parser.add_argument('-n', '--name', type=str, required=False, help='Sample name to include default=filename') parser.add_argument('-c', '--config', type=str, required=False, help='Locidex parameter config file (json)') parser.add_argument('--db_name', type=str, required=False, help='Name of database to perform search, used when a manifest is specified as a db') - group.add_argument("--db_group", type=str, required=False, help="A directory of databases containing a manifest file. Requires the db_name option to be set to select the correct db") parser.add_argument('--db_version', type=str, required=False, help='Version of database to perform search, used when a manifest is specified as a db') parser.add_argument('--min_evalue', type=float, required=False, help='Minumum evalue required for match', default=0.0001) @@ -295,8 +295,6 @@ def run_search(config): fh.write(json.dumps(run_data, indent=4)) - - def run(cmd_args=None): #cmd_args = parse_args() if cmd_args is None: @@ -304,13 +302,14 @@ def run(cmd_args=None): cmd_args = parser.parse_args() analysis_parameters = vars(cmd_args) - for opt in OPTION_GROUPS: if analysis_parameters[opt] is not None: for option in analysis_parameters: if analysis_parameters[option] is None: parser.error("Missing required parameter: {}".format(option)) + if cmd_args.db_group is not None: + analysis_parameters.db = manifest.get_manifest_db(input_file=Path(cmd_args.db_group), name=cmd_args.db_name, version=cmd_args.db_version) config_file = cmd_args.config diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 156000b..ae2307a 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -109,3 +109,15 @@ def test_read_manifest(tmpdir): comp_data = output[k] v = [i.to_dict() for i in v] assert v == comp_data + + +def test_select_db(tmpdir): + outdir = Path(tmpdir / "build") + shutil.copytree(TEST_PASS_MULTIPLE, outdir) + cmd_args = CMDArgs(input=outdir) + file_out = manifest.run(cmd_args=cmd_args) + assert file_out.exists() + db_out = {'path': 'pass_three_db', + 'config': {'db_name': 'Locidex Database 3', 'db_version': '1.0.0', 'db_date': '04/04/2024', 'db_author': 'test1', 'db_desc': 'test1', 'db_num_seqs': 53, 'is_nucl': True, 'is_prot': True, 'nucleotide_db_name': 'nucleotide', 'protein_db_name': 'protein'}} + manifest_data: Dict[str, manifest.ManifestItem] = manifest.read_manifest(outdir) + assert manifest.select_db(manifest_data, "Locidex Database 3", "1.0.0").to_dict() == db_out diff --git a/tests/test_workflows.yml b/tests/test_workflows.yml new file mode 100644 index 0000000..ad5f2f2 --- /dev/null +++ b/tests/test_workflows.yml @@ -0,0 +1,8 @@ +- name: Run help + command: locidex search --help +- name: Run search help + command: locidex search --help +- name: Run build help + command: locidex build --help +- name: Run extract help + command: locidex extract --help \ No newline at end of file From 03cbdd61bdf26bf0e393855de1dfcd93976a59ba Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 30 Apr 2024 16:08:44 -0500 Subject: [PATCH 40/51] updated tests --- .../blast/nucleotide/nucleotide.nin | Bin 800 -> 792 bytes .../blast/nucleotide/nucleotide.njs | 8 +++---- .../blast/protein/protein.pdb | Bin 0 -> 20480 bytes .../blast/protein/protein.phr | Bin 0 -> 3435 bytes .../blast/protein/protein.pin | Bin 0 -> 568 bytes .../blast/protein/protein.pjs | 22 ++++++++++++++++++ .../blast/protein/protein.pot | Bin 0 -> 644 bytes .../blast/protein/protein.psq | Bin 0 -> 8401 bytes .../blast/protein/protein.ptf | Bin 0 -> 16384 bytes .../blast/protein/protein.pto | Bin 0 -> 216 bytes locidex/example/build_db_mlst_out/config.json | 2 +- .../example/build_db_mlst_out/results.json | 11 ++++----- locidex/search.py | 4 ++-- tests/test_workflows.yml | 5 +++- 14 files changed, 38 insertions(+), 14 deletions(-) create mode 100644 locidex/example/build_db_mlst_out/blast/protein/protein.pdb create mode 100644 locidex/example/build_db_mlst_out/blast/protein/protein.phr create mode 100644 locidex/example/build_db_mlst_out/blast/protein/protein.pin create mode 100644 locidex/example/build_db_mlst_out/blast/protein/protein.pjs create mode 100644 locidex/example/build_db_mlst_out/blast/protein/protein.pot create mode 100644 locidex/example/build_db_mlst_out/blast/protein/protein.psq create mode 100644 locidex/example/build_db_mlst_out/blast/protein/protein.ptf create mode 100644 locidex/example/build_db_mlst_out/blast/protein/protein.pto diff --git a/locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.nin b/locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.nin index d06b8573e036660d656bdf0bf4f3e4a6c8292aa9..4b09f49dafec1cf5d8fd80ca5bc3fb4ef51a6130 100644 GIT binary patch delta 86 zcmZ3$HiONWfq{V)h+)80Pd_I=IWr}-LO-=4F}ENmRX?dTGbbfJB`H2Pr?@0OzqDkc hnYoy#V?mLEv4M_)k%5tkf`W;afr&zZ@5YW9OaOo17U2K@ delta 94 zcmbQiwt&ryfq{V)h+)7(za+Onzo4=twYWq#KTS8cJT)h$7$#t0q@PrpnUi8L(b!yC f%CVqG!9+*F$iT=%LBYt%&`2S`7h=xFh8au%D6|(7 diff --git a/locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.njs b/locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.njs index 6ce489d..2819c06 100644 --- a/locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.njs +++ b/locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.njs @@ -3,13 +3,13 @@ "dbname": "nucleotide", "dbtype": "Nucleotide", "db-version": 5, - "description": "/tmp/pytest-of-mwells/pytest-82/build0/blast/nucleotide/nucleotide.fasta", + "description": "./locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.fasta", "number-of-letters": 25041, "number-of-sequences": 53, - "last-updated": "2024-04-04T14:12:00", + "last-updated": "2024-04-30T16:04:00", "number-of-volumes": 1, - "bytes-total": 48256, - "bytes-to-cache": 7097, + "bytes-total": 48248, + "bytes-to-cache": 7089, "files": [ "nucleotide.ndb", "nucleotide.nhr", diff --git a/locidex/example/build_db_mlst_out/blast/protein/protein.pdb b/locidex/example/build_db_mlst_out/blast/protein/protein.pdb new file mode 100644 index 0000000000000000000000000000000000000000..707749d104c55e8225559067c426a6ab6375f9c0 GIT binary patch literal 20480 zcmeI%K}y3w6adg~6q;Q`yo0!N3bORK$56a26G33lxW~VPj%q;tg;sy%comRr(uEevFOhThS)()6H3s z6>seN|G!V$0yw3B1Wqd;f-?%p;H&~dIH!OV&MP2> z3kt~Lq5^`rq<|zYBhZn=6$K=5RRKv{Q$P~e6_CUY1tf7(0ZD8qAckiNjy+M5)T!S#HIq0c%*DN~XSsi(_pTJeckB)ViqmqIfMD{6)p zVpb#=2?uAa`D8G@kTkQ;0HRO88^*l+nE0WQGhi?)wTTNJwk#BTooH4p9;?y@ZRlXt;Imt1zmfvc{$?uMIgx$TZacinU3 wz6Tz9Z*opT}eelsIpMCMwH{bp6(=WgMapr&Q3tcn?%>V!Z literal 0 HcmV?d00001 diff --git a/locidex/example/build_db_mlst_out/blast/protein/protein.psq b/locidex/example/build_db_mlst_out/blast/protein/protein.psq new file mode 100644 index 0000000000000000000000000000000000000000..d48794c953e7b23e4dfd363f7876f1dcbf238cdb GIT binary patch literal 8401 zcmdU!+j8SL3`BzxNsxqi0pZgBf9kemJC<)5&y16)t=fIbIPr0#8v@{u9D3zQZW&8# zd132r>CD_!srx~5HzKA)IWN+h51hPpT14B-&M3KT?ofR4rMM8L>I^?23eHRFUUK&A zG%@AW%xJJxV_{z{xGs)dqDV4pDrA|>Cw3g1S#O!O#qO$%+W5mRoPOn$uQ&|9F zG7o>!GyjR6_*)P4e5vk#rl-5-3}5-PwTv9u#<}GP{-o8P?z8TMXyl`w@aJ9K-KOVZ zr{|L#<-I@Yp7ZM*t(l+0RBDcFvQ<4Vte z&RL(--Cft6#D1-(e~UjK=$SS>ANq60nYJ7aa5_B6zdc6}Io%6&UpYze!q}d3j<@Cg zmXl^oCJlAp`BOgg?iBt^?L$fMc*4Uk0JTN35zwkVMx(}A4;A=*36!{xK|&ITDTk8c zQpI(j(2hhJ+LyGFh%-iygw|IuabaL$KV(p6!W0!IKP{~*^E2fnagiI?}B|q@30#=$);{9ne;?zACf=*=FnMS|NBom9Z-s{>ap_ znM_)1O@#Sc7J`i?w&qhP#!L-+2|^a|?8cmdMI@)dl?w$Mo^o%d4MQmt)|46Zd|lIL z_+yGbSY0q$gQJan=|EdBT{RY-A_YCi_@9^S|Qc zM6KL#VrTnpPG{e%C(lsr!bz*1H=JEx(DROSytD2Y8fWS|&aLhz&hnt{5B!-oe^OfV z$)9*$z@K;sq`g0vzx3w}a~r#pKi$2a)Sb9Cr7!)N{dL_1o7Y=8GqS^%x_i-1?#iF^ ziyZY$&t^-z%KPxx27WW|<7wWNX0#H+b9|TgQ{BmLd0+M1@~-ZHMWO}5_JLJIa9W7b zyqQw$=p9)CdbuxnUKbANC}Sm(Djngh^}CqKtJp50~F(v0-e#>LyECcEb%)iw5$}LT0jUXk0~%7X<=0G zZCW%;vVPS9t^7iPLrQ=aPbl=L#TzM5k-Sl0hrCdneDMWEeybKMg_90n?6p`aFdjbf z#rJ4&bf=yxq7e-hcdH>SRd~>+n~Y@2@CA;2*S>(?j~T(~Vao_^0Q-#OCmFGI+SW=) z17>u(uE;Na;cqDZE~%)F3pO-#z z4^lkzN9Pyc7s2H>;CzFhD^@o=vn177GR`|a5*Ovs(LDttLhRg;UP<_$bH=1AxFQEE zNiq9~hZFA2d5JRzRhQP24U0uookGEp1{Yq_YBsatK#j2v|M{v~vI!2BDdN4ct}#xL zxZAve(|(E*m#G)@OwaUui8C}Dz8>nCUe@y`IG@-3HO@YQVDtYF85ao7R9W4-uOvCnM&Jm%>CpeLf*3#(f& z%b)|(BpC6g$*04>4TEjLHw6`o7`7_xm|16C8ZH?a1($%XU=nK|X<)-^0ndp^QZ9oF z&b1YD5WY7;)}1R-@$FF3BBjhU2gOkZ<5b2(g~H%&gn4YnaEX_om!#2D)nlf@fxv20 zH+_6NXVJBoPdV+(Ii7OX+<$-*E3ZQnKI$3o^!()f*Ltc!Pi$a+QFs1{y2GE|VFP=@ z`HG&;IP+bOusR%b#E)}CcR6yW964^;c^w<)IOPaW+H;Q1oN7<~*)Z*Xe~uJirG8QO zGv_hy{omz%<9xC1IF&u>OK6mD(o^s1e(9e$oxQ4OdCd9DpTFPd@BMkcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/locidex/example/build_db_mlst_out/blast/protein/protein.pto b/locidex/example/build_db_mlst_out/blast/protein/protein.pto new file mode 100644 index 0000000000000000000000000000000000000000..ad19396e81aff427697a109c3c035ac73cb27f3f GIT binary patch literal 216 zcmXBFg${xM06;;oyHK$M6YTo`U-LHJc6+}dFSG(hN|dQkrAD0wO Date: Tue, 30 Apr 2024 16:36:59 -0500 Subject: [PATCH 41/51] updated tests and and search workflow --- locidex/build.py | 1 - .../blast/nucleotide/nucleotide.nin | Bin 792 -> 792 bytes .../blast/nucleotide/nucleotide.njs | 4 ++-- .../blast/protein/protein.pin | Bin 0 -> 568 bytes .../blast/protein/protein.pjs | 22 ++++++++++++++++++ .../example/build_db_mlst_out/results.json | 8 +++---- tests/test_build.py | 2 +- tests/test_seq_intake.py | 4 ++-- 8 files changed, 31 insertions(+), 10 deletions(-) create mode 100644 locidex/example/build_db_mlst_out/blast/protein/protein.pin create mode 100644 locidex/example/build_db_mlst_out/blast/protein/protein.pjs diff --git a/locidex/build.py b/locidex/build.py index a810409..be01c84 100644 --- a/locidex/build.py +++ b/locidex/build.py @@ -186,7 +186,6 @@ def run(cmd_args=None): db_desc=cmd_args.db_desc, db_author=cmd_args.author, db_date=datetime.now().strftime("%Y/%d/%m"), - ) if not os.path.isfile(input_file): diff --git a/locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.nin b/locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.nin index 4b09f49dafec1cf5d8fd80ca5bc3fb4ef51a6130..b47d2564675d574bd435866fe18c04172f4b2d4b 100644 GIT binary patch delta 53 zcmbQiHiJ!ofq{V)h+)8aqELi{xMM+)g0X>)f{}rdiGqTOm64@FfG-2X#)cV80LCf_ A8UO$Q delta 53 zcmbQiHiJ!wfq{V)h+)80Pk*97gqWyfL6L&7fsTTafsu)Vf{B%Zi9&$y#)cV80LASI AZU6uP diff --git a/locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.njs b/locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.njs index 2819c06..759cb8c 100644 --- a/locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.njs +++ b/locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.njs @@ -3,10 +3,10 @@ "dbname": "nucleotide", "dbtype": "Nucleotide", "db-version": 5, - "description": "./locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.fasta", + "description": "locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.fasta", "number-of-letters": 25041, "number-of-sequences": 53, - "last-updated": "2024-04-30T16:04:00", + "last-updated": "2024-04-30T16:29:00", "number-of-volumes": 1, "bytes-total": 48248, "bytes-to-cache": 7089, diff --git a/locidex/example/build_db_mlst_out/blast/protein/protein.pin b/locidex/example/build_db_mlst_out/blast/protein/protein.pin new file mode 100644 index 0000000000000000000000000000000000000000..40267383c312cd9301e6ff4b4ba45732ba54b2d0 GIT binary patch literal 568 zcmXxh%PT}-7{~G7xQsE3alZ`4$wH9~gT<(+k%c57$--iq8%@rbi4jZ6g0i42hy|ta z7udRFAxRb%l7$6jX=&#>E>oX*&U?;ze(&>kB;`u7AoKH^$w)jJTk^z~LWz_a^Mn`T zW;7TL2NULeI+$EYd%|XDKJ7`(CDXC^?0;>W;%10|g1-lCsc4vGb#H3U=xlE_yzSmD z!|3Yq`izmG%uZ7Gp7B#Nf5(4U0qQc&e@C(esH`olvxESaaUo@MBfAf)xR!FP=*Jpv zIF=oQ*ub4+Ey6H1@gU`uVhr1Olx&rlzz&|Id>1CMj~B`AMi2*h<)n>>;0SM0VH2it zf)B~jf*JiDXk%;5vswsrRGz$JSzqy3#d7Rj}N9tS-R zdWz_A(&MD3n4S`PO3tKGKdz**0o+UF<9L=TLb#PGNAV?99rB5Ir}|T>A=gE&i`-gr W>&UGmx1L-#x$Y;aVMQ{U@$e0o_*|C& literal 0 HcmV?d00001 diff --git a/locidex/example/build_db_mlst_out/blast/protein/protein.pjs b/locidex/example/build_db_mlst_out/blast/protein/protein.pjs new file mode 100644 index 0000000..f744503 --- /dev/null +++ b/locidex/example/build_db_mlst_out/blast/protein/protein.pjs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "protein", + "dbtype": "Protein", + "db-version": 5, + "description": "locidex/example/build_db_mlst_out/blast/protein/protein.fasta", + "number-of-letters": 8347, + "number-of-sequences": 53, + "last-updated": "2024-04-30T16:29:00", + "number-of-volumes": 1, + "bytes-total": 50128, + "bytes-to-cache": 8969, + "files": [ + "protein.pdb", + "protein.phr", + "protein.pin", + "protein.pot", + "protein.psq", + "protein.ptf", + "protein.pto" + ] +} diff --git a/locidex/example/build_db_mlst_out/results.json b/locidex/example/build_db_mlst_out/results.json index 67b627d..e8c0c25 100644 --- a/locidex/example/build_db_mlst_out/results.json +++ b/locidex/example/build_db_mlst_out/results.json @@ -1,13 +1,13 @@ { - "analysis_start_time": "2024-04-30 16:04:14", + "analysis_start_time": "2024-04-30 16:29:13", "parameters": { - "input_file": "./locidex/example/build_db_mlst_in/senterica.mlst.txt", - "outdir": "./locidex/example/build_db_mlst_out/", + "input_file": "locidex/example/build_db_mlst_in/senterica.mlst.txt", + "outdir": "locidex/example/build_db_mlst_out/", "name": "Locidex Database", "author": "", "db_ver": "1.0.0", "db_desc": "", "force": true }, - "analysis_end_time": "2024-04-30 16:04:15" + "analysis_end_time": "2024-04-30 16:29:13" } \ No newline at end of file diff --git a/tests/test_build.py b/tests/test_build.py index 0a20aa3..680e89a 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -72,7 +72,7 @@ def get_all_file_paths(dir): ("config.json", ["db_name", "db_version", "db_author", "db_desc", "db_num_seqs", "is_nucl", "is_prot", "nucleotide_db_name", "protein_db_name"], None), - ("results.json", ["input_file", "name", "db_ver", "db_desc", "author", "date", "force"], "parameters") + ("results.json", ["input_file", "name", "db_ver", "db_desc", "author", "force"], "parameters") ]) def test_config_results_json(output_directory,f_name,comp_fields, primary_key): """Verify that config and results files outputs are the same. diff --git a/tests/test_seq_intake.py b/tests/test_seq_intake.py index 6b27f66..37865cf 100644 --- a/tests/test_seq_intake.py +++ b/tests/test_seq_intake.py @@ -1,6 +1,6 @@ import os, warnings import locidex.classes.seq_intake -from locidex.constants import BLAST_TABLE_COLS, DB_EXPECTED_FILES, DB_CONFIG_FIELDS +from locidex.constants import BLAST_TABLE_COLS, DB_EXPECTED_FILES, DBConfig from locidex.classes.db import search_db_conf, db_config from collections import Counter @@ -19,7 +19,7 @@ def seq_intake_class_init(input_file, file_type, perform_annotation): #@pytest.mark.skip(reason="no way of currently testing this") def test_seq_store_class(): db_dir = os.path.join(PACKAGE_ROOT, 'example/build_db_mlst_out') - db_database_config = search_db_conf(db_dir, DB_EXPECTED_FILES, DB_CONFIG_FIELDS) + db_database_config = search_db_conf(db_dir, DB_EXPECTED_FILES, DBConfig._keys()) metadata_obj = db_config(db_database_config.meta_file_path, ['meta', 'info']) sample_name = 'NC_003198.1.fasta' seq_obj = locidex.classes.seq_intake.seq_intake(input_file=os.path.join(PACKAGE_ROOT, 'example/search/NC_003198.1.fasta'), From 51cfd6a07619d97236eea1440f29002ded13f709 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 30 Apr 2024 17:08:27 -0500 Subject: [PATCH 42/51] updated test data for workflows --- .../manifest_selection/dbs/manifest.json | 19 + .../one_db/blast/nucleotide/nucleotide.fasta | 106 ++ .../one_db/blast/nucleotide/nucleotide.ndb | Bin 0 -> 20480 bytes .../one_db/blast/nucleotide/nucleotide.nhr | Bin 0 -> 3435 bytes .../one_db/blast/nucleotide/nucleotide.nin | Bin 0 -> 792 bytes .../one_db/blast/nucleotide/nucleotide.njs | 22 + .../one_db/blast/nucleotide/nucleotide.not | Bin 0 -> 644 bytes .../one_db/blast/nucleotide/nucleotide.nsq | Bin 0 -> 6297 bytes .../one_db/blast/nucleotide/nucleotide.ntf | Bin 0 -> 16384 bytes .../one_db/blast/nucleotide/nucleotide.nto | Bin 0 -> 216 bytes .../dbs/one_db/blast/protein/protein.fasta | 106 ++ .../dbs/one_db/blast/protein/protein.pdb | Bin 0 -> 20480 bytes .../dbs/one_db/blast/protein/protein.phr | Bin 0 -> 3435 bytes .../dbs/one_db/blast/protein/protein.pin | Bin 0 -> 568 bytes .../dbs/one_db/blast/protein/protein.pjs | 22 + .../dbs/one_db/blast/protein/protein.pot | Bin 0 -> 644 bytes .../dbs/one_db/blast/protein/protein.psq | Bin 0 -> 8401 bytes .../dbs/one_db/blast/protein/protein.ptf | Bin 0 -> 16384 bytes .../dbs/one_db/blast/protein/protein.pto | Bin 0 -> 216 bytes .../manifest_selection/dbs/one_db/config.json | 12 + .../manifest_selection/dbs/one_db/meta.json | 1181 +++++++++++++++++ .../dbs/one_db/results.json | 13 + locidex/extract.py | 4 +- locidex/manifest.py | 4 +- locidex/search.py | 4 +- tests/test_workflows.yml | 5 +- 26 files changed, 1492 insertions(+), 6 deletions(-) create mode 100644 locidex/example/manifest_selection/dbs/manifest.json create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.fasta create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.ndb create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.nhr create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.nin create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.njs create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.not create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.nsq create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.ntf create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.nto create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.fasta create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pdb create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.phr create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pin create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pjs create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pot create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.psq create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.ptf create mode 100644 locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pto create mode 100644 locidex/example/manifest_selection/dbs/one_db/config.json create mode 100644 locidex/example/manifest_selection/dbs/one_db/meta.json create mode 100644 locidex/example/manifest_selection/dbs/one_db/results.json diff --git a/locidex/example/manifest_selection/dbs/manifest.json b/locidex/example/manifest_selection/dbs/manifest.json new file mode 100644 index 0000000..d4ba6b2 --- /dev/null +++ b/locidex/example/manifest_selection/dbs/manifest.json @@ -0,0 +1,19 @@ +{ + "Locidex Database": [ + { + "path": "one_db", + "config": { + "db_name": "Locidex Database", + "db_version": "1.0.0", + "db_date": "2024/30/04", + "db_author": "tester", + "db_desc": "Test database for CI", + "db_num_seqs": 53, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + } + } + ] +} \ No newline at end of file diff --git a/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.fasta b/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.fasta new file mode 100644 index 0000000..a03cb89 --- /dev/null +++ b/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.fasta @@ -0,0 +1,106 @@ +>0 +AAATTCCGTCCCGGACATGCGGACTACACCTATCACCAAAAATACGGTGTGCGAGATTACCGTGGCGGCGGCCGTTCATCGGCACGTGAAACCGCCATGCGTGTTGCTGCGGGAGCGATTGCCAAAAAATATCTGCAGCAAGAGTTTGGCATTGAAGTGCGTGCTTACTTGTCGCAAATGGGGGATGTCGCGATTGATAAAGTGGATTGGAATGAGATTGAAAACAACGATTTCTTCTGTCCTGATGTCGATAAAGTGGCTGCGTTTGACGAGCTGATCCGCGAGCTGAAAAAAGAAGGCGATTCGATCGGCGCGAAAATCCAAGTGGTCGCTACAGGCGTGCCGGTTGGACTGGGTGAGCCTGTGTTTGATCGCTTAGATGCGGATATTGCCCATGCCTTGATGAGCATCAACGCCGTGAAAGGAGTCGAGATTGGTGATGGCTTTGATGTGGTGCGCCAAAAAGGCAGCCAACACCGTGACCCGCTCACTCCACAAGGT +>1 +GTTTTCCGCCCGGGCCATGCCGACTATACCTACGAGCAGAAATACGGTCTGCGCGATTACCGTGGCGGCGGTCGTTCTTCCGCCCGTGAAACGGCGATGCGCGTCGCGGCTGGCGCGATTGCTAAAAAATATCTGGCGGAGAAACACGGCATCGTCATTCAGGGGTGTCTGACCCAGATGGGCGATATTCCGCTTGAAATCAAAGACTGGCAGCAGGTTGAACAAAACCCGTTTTTCTGTCCTGATCCAGATAAAATCGACGCGCTGGATGAACTGATGCGCGCCCTGAAGAAAGAGGGCGATTCGATTGGGGCAAAAGTGACCGTCGTGGCAAACGGCGTTCCGGCCGGGCTTGGCGAACCGGTCTTTGACCGTCTGGATGCGGACATCGCTCATGCGCTGATGAGCATCAACGCGGTAAAAGGCGTGGAGATTGGCGATGGGTTTGATGTGGTCGCGTTGCGAGGCAGCCAGAATCGCGATGAAATTACCAAAGAGGGC +>2 +GTTTTCCGTCCAGGACACGCTGACTATACCTATGAGCAGAAATATGGCCTGCGCGACTACCGTGGCGGCGGACGTTCATCCGCGCGTGAAACGGCGATGCGCGTTGCGGCTGGCGCGATTGCCAAAAAATATCTGGCGGAAAAATTCGGCGTTGAAATTCGCGGCTGTCTGACGCAGATGGGGGATATTCCGCTGGAGATCAAAGACTGGTCTCAGGTGGAGCTTAACCCGTTCTTTTGTCCAGACCCGGATAAAATCGAAGTGCTGGACGAACTGATGCGCGGGCTGAAGAAAGAGGGCGACTCCATCGGGGCAAAAGTGACCGTTGTTGCAAGCGGCGTACCGGCGGGTCTCGGCGAACCTGTATTCGACCGTCTGGATGCCGACATCGCCCATGCGCTGATGAGCATTAACGCCGTTAAGGGCGTTGAGATTGGCGACGGTTTTGACGTTGTTGCGCTGCGCGGCAGTCAGAACCGCGATGAGATCACCAAAGAAGGT +>3 +GTTTTCCGCCCAGGGCATGCTGATTATACCTATGAACAAAAATATGGTTTGCGTGATTATCGTGGTGGTGGACGTTCTTCTGCTCGTGAAACGGCAATGCGTGTCGCCGCAGGTGCGATTGCTAAAAAATATCTAAAAGAGAAATTAGGCATCGAAGTTCGAGGATATCTTTCTCAGCTAGGACCTATTACATGTGATCTTGTTGATTGGTCTATTGTTGAAAGCAATCCATTTTTCTGTCCTGATCCTTCACGTTTAGATGCGCTTGATGAATACATGCGTGCACTTAAAAAAGAAGGTAATTCTATTGGTGCAAAAGTCACTGTGGTTGCACAGGGTGTACCTGCTGGATTTGGTGAACCTGTCTTTGATCGATTAGATGCTGATTTAGCGCATGCTTTGATGAGTATCAATGCTGTCAAAGGTATAGAAATTGGTGATGGATTTGGTGTTGTAACATTAAAAGGTACAGAAAACCGAGATGAAATCACTAAAAAGGGA +>4 +GTTTTCCGTCCAGGCCATGCCGATTACACCTACGAACAAAAATACGGTCTGCGCGATTATCGCGGCGGCGGGCGCTCTTCCGCCCGCGAAACCGCCATGCGCGTGGCGGCAGGGGCGATTGCAAAAAAATATCTCGCCGAGAAATTTGGCATTGAGATTCGCGGCTGCCTGACCCAGATGGGTGACATTCCGCTGGAAATCAAAGACTGGTCGCAGGTCGAGCAAAATCCGTTTTTCTGCCCGGACCCGGACAAAATCGACGCGTTAGATGAACTGATGCGCGCGCTGAAAAAAGAGGGCGACTCCATCGGCGCGAAAGTCACCGTTGTTGCCAGTGGCGTCCCCGCCGGACTTGGCGAGCCGGTCTTTGACCGCCTGGATGCCGACATCGCCCATGCGCTGATGAGCATCAACGCGGTGAAAGGCGTAGAAATTGGTGATGGTTTTGACGTGGTGGCGCTGCGTGGCAGCCAGAACCGCGACGAAATCACCAAAGACGGT +>5 +GTTTTCCGTCCTGGTCACGCCGACTATACCTACGAACAAAAATATGGCTTTCGCGACTATCGCGGCGGCGGGCGTTCTTCCGCGCGTGAAACCGCGATGCGCGTGGCGGCAGGGGCAATTGCCAAAAAATATCTCCAGCAGAAATTCGGCATCGTTATCCGCGGCTGTCTGTCCCAGATGGGCGACATTCCGCTGGCAATCAAAGACTGGGATCAGGTAGAGCTCAACCCGTTCTTCTGCGCCGATGCCGACAAGCTGGACGCGCTGGATGAGCTGATGCGTGGCCTGAAAAAAGAGGGCGACTCCATTGGTGCGAAAGTCACCGTGGTGGCCGACGGCGTGCCGGCTGGCTGGGGCGAGCCGGTATTTGACCGCCTTGACGCCGACATCGCCCACGCGCTGATGAGCATCAACGCGGTGAAAGGCGTCGAAATCGGCGACGGTTTTGACGTGGTCAAGCTTCGCGGCAGCCAGAACCGCGACGAAATCACGAAGGCGGGT +>6 +GTGTTCCGTCCGGGGCACGCGGATTACACCTACGAACAAAAATACGGCCTGCGCGACTATCGCGGCGGCGGGCGTTCATCCGCCCGTGAAACCGCCATGCGCGTCGCGGCAGGCGCTATCGCCAAAAAATATCTGGCGCAGAAATTCGGCGTGGTGATTCGCGGCTGCCTGACCCAGATGGGTGATATTCCGCTGGAAATCAAAGACTGGGATCAGGTAGAGCAAAACCCGTTCTTCTGCCCGGACCCGGATAAAATCGAGGCGCTGGATGAGCTGATGCGCGCTCTGAAAAAAGAGGGCGATTCCATCGGCGCGAAAGTCACCGTGGTGGCCGACAGCGTGCCCGCCGGGCTTGGCGAGCCGGTATTTGACCGCCTGGACGCCGATATCGCCCACGCGCTGATGAGCATTAACGCCGTGAAGGGCGTGGAAATCGGCGACGGTTTCGGCGTGGTGCAACTGCGCGGCAGCCAGAACCGCGACGAAATCACCACTGCCGGT +>7 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAGCCAGGCGCCACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGCGGCCTGCCGGAGGGCGCGGAGATTGCCGTTCAGTTGGAAGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGCCGCTTCTCGCTGTCTACGCTGCCTGCCGCCGATTTCCCGAATCTTGACGACTGGCAAAGCGAAGTTGAATTTACGCTGCCGCAGGCCACGATGAAGCGCCTGATTGAAGCGACCCAGTTTTCGATGGCCCATCAGGATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAGCGAACTGCGCACTGTTGCGACCGACGGCCACCGTCTGGCGGTGTGCTCAATGCCGCTGGAGGCGTCTTTACCTAGCCACTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGTGGCGAAAACCCGCTGCGCGTGCAG +>8 +ATGGAGATGGTCGCGCGCGTTACGCTTTCTCAGCCGCATGAACCCGGCGCTACTACCGTGCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCCTGCCGGAAGGGGCGGAAATCGCCGTTCAGCTGGAGGGCGATCGGATGCTGGTGCGTTCTGGCCGTAGTCGCTTTTCGCTGTCTACCTTACCGGCAGCAGACTTCCCGAATCTGGATGACTGGCAAAGCGAAGTGGAATTCACGCTGCCTCAGGCGACGATGAAACGCTTGATTGAGGCCACCCAGTTTTCGATGGCCCATCAGGACGTGCGCTACTACCTGAACGGTATGTTGTTTGAAACGGAAGGAAGCGAACTGCGCACCGTCGCGACCGACGGCCACCGTCTGGCGGTCTGTTCAATGCCGCTGGAGGCCTCTTTACCGAGCCATTCAGTGATCGTACCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTTGACGGCGGTGAAAATCCACTGCGTGTACAG +>9 +ATGGAAATGGTGGCGCGCGTTGCGTTGATTCAGCCTCATGAACCAGGCGCAACTACCGTCCCGGCGCGGAAATTCTTTGATATCTGCCGTGGCTTGCCGGAAGGGGCTGAAATTGCCGTCCAGCTGGAAGGCGATCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTCTCGCTTTCCACGCTGCCTGCCGCCGATTTCCCTAATCTGGATGACTGGCAGAGCGAAGTCGAATTCACCCTGCCGCAGGCAACGATGAAGCGCCTGATTGAAGCCACCCAGTTCTCAATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAGACTGAAGGTGAAGAGTTGCGTACCGTCGCGACCGACGGTCACCGTCTGGCGGTCTGCTCTATGCCGGTCGGGCAATCTCTGCCTAACCATTCGGTGATTGTGCCGCGTAAAGGCGTGATTGAGCTGATGCGTATGCTCGACGGCGGCGAAACCCCGCTGCGCGTACAG +>10 +ATGGAGATGGTGGCGCGCGTGGCGCTGATCCAGCCTCATGAACCTGGTGCGACCACCGTTCCGGCGCGTAAATTCTTCGATATTTGCCGTGGATTACCAGAAGGGGCGGAAATTGCCGTTCAACTGGAAGGCGACCGTATGCTGGTGCGTTCTGGCCGCAGCCGTTTCTCGCTGTCTACGCTGCCTGCCGCCGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTCGAATTCACCCTGCCACAGGCGACAATGAAGCGCCTGATTGAAGCCACGCAGTTTTCGATGGCGCATCAGGACGTGCGTTACTACTTAAACGGCATGCTGTTTGAAACCGAAGGGGAAGAGTTGCGTACCGTGGCGACCGACGGTCACCGCCTGGCGGTCTGTTCAATGCCTGTCGGTCAGCCGTTGCCTAGCCATTCGGTGATCGTACCGCGTAAAGGTGTGATTGAACTGATGCGTATGCTCGACGGCGGCGATAACCCGCTGCGCGTGCAG +>11 +ATGGAAATGGTGGCACGCGTTGCGCTGGTTCAGCCGCACGAACCAGGGGCGACGACCGTTCCAGCGCGCAAATTCTTTGATATCTGCCGTGGTCTGCCTGAAGGCGCGGAAATTGCCGTGCAGCTGGAAGGTGAGCGGATGCTGGTGCGCTCCGGGCGTAGCCGTTTTTCGCTGTCTACCCTGCCAGCGGCGGATTTCCCGAATCTCGATGACTGGCAGAGCGAAGTCGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCGACCCAGTTTTCTATGGCGCATCAGGACGTTCGCTATTACTTAAACGGTATGCTGTTTGAAACCGAAGGTGAAGAACTGCGCACCGTGGCGACCGACGGCCACCGTCTGGCAGTCTGTTCAATGCCAATTGGTCAATCTTTGCCAAGCCATTCGGTGATCGTGCCGCGTAAAGGCGTGATTGAACTGATGCGTATGCTCGACGGCGGCGACAATCCGCTGCGCGTGCAG +>12 +ATGGAAATGATCGCGCGCGTTACGCTGACTCAGCCGCACGACGCGGGCGCGACCACGGTTCCGGCACGTAAATTCTTTGATATTTGCCGTGGGCTGCCGGAAGGCGCTGAAATCGCAGTGCAGCTGGAGGGCGACCGCATGCTGGTGCGCTCTGGCCGCAGCCGTTTCTCCCTCTCCACGTTGCCCGCTGCGGACTTCCCGAACCTGGATGACTGGCAGAGCGAAGTTGAATTTACCCTGCCGCAGGCGACGATGAAGCGTCTGATTGAAGCCACGCAGTTCTCCATGGCGCATCAGGACGTTCGTTACTACTTAAACGGCATGCTGTTCGAAACCGAAGGTGAAGAGCTGCGTACCGTGGCGACCGACGGTCACCGTCTGGCGGTTTGTTCCATGCCGATTGGCGATTCACTGCCAAACCATTCGGTGATCGTACCGCGTAAAGGCGTAATTGAACTGATGCGTATGCTCGACGGCGGTGAAACGCCGCTGCGCGTGCAG +>13 +ATGGAGATGATCGCGCGTGTGGCGCTGTCGCTACCGCACCAGGCGGGCGCGACCACCGTGCCGGCGCGCAAATTCTTCGATATCTGCCGTGGCTTGCCGGAAGGGGCGGAAATCGCCGTTACGCTGGAAGGCGACAGAATGCTGGTGCGCTCCGGGCGCAGCCGCTTCTCGCTGTCTACGTTACCGGCGGCAGACTTCCCGAATCTGGACGACTGGCAGAGCGAAGTGGAGTTCACGCTCCCGCAGGCCACCATGAAGCGCCTGATCGAAGCGACCCAGTTCTCCATGGCCCATCAGGACGTGCGGTATTACCTGAACGGGATGCTGTTTGAAACCGAAGGCGAAGAGCTGCGCACCGTGGCGACTGACGGCCACCGTCTGGCGGTATGCGCGATGCCGGTAGGCCAACCGCTGCCAAACCATTCGGTGATTGTACCGCGTAAAGGCGTGCTGGAGCTGATGCGTATGCTCGATGGCGGCGACAGCCCGCTGCGCATTCAG +>14 +TCGGCGCTGACGGAAAACGATCTGGTCTTCGCCCTCTCGCAGCACGCCGTCACCTTTGCAGATGCCGAGCTTCAGCAACAAGGGAAAAGCTGGCCCTCCCTTCCGCGTTATTTTGCCATTGGTCGCACAACGGCGCTGGCGCTGCATACCGTTAGCGGTTTCAATATTCACTACCCTCTGGATCGGGAAATTAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGCGCGCTTATATTACGCGGCAATGGTGGCCGTGAGCTGATAGGTGAAACCCTGACAGCACGCGGAGCTGATGTCGATTTTTGTGAATGTTATCAACGCAGTGCAAAATATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCTCGTGGTGTGACCACGGTGGTTGTCACCAGCGGAGAGATGCTACAA +>15 +GCGGCGCTGGGGGAGAGCGATCTGTTGTTTGCCCTCTCGCAACACGCGGTTGCTTTTGCCCAATCACAGCTGCATCAGCAAGATCGTAAATGGCCCCGACTACCTACTTATTTCGCCATTGGACGCACCACCGCACTGGCGCTACATACCGTAAGCGGACAGAAGATTCTCTACCCGCAGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGTGCGCTGATATTACGTGGCAATGGCGGTCGTGAGCTAATTGGGGATACCCTGACGGCGCGCGGTGCTGAGGTCACTTTTTGTGAATGTTATCAACGATGCGCAATCCATTACGATGGTGCAGAAGAAGCGATGCGCTGGCAATCCCGCGAGGTGACGACGGTCGTTGTTACCAGCGGTGAAATGTTGCAG +>16 +GCGACGTTGACGGAAAACGATCTGGTTTTTGCCCTTTCACAGCACGCCGTCGCCTTTGCCCACGCCCAACTCCAGCGAGATGGTCGAAACTGGCCTGCGTCGCCGCGCTATTTCGCGATTGGTCGCACCACGGCGCTCGCCCTTCATACCGTTAGCGGGTTCGATATTCGTTATCCATTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGCAAACGCGCGCTGATTTTGCGTGGCAATGGCGGTCGCGGTCGCGAACTGCTGGGCGAAACCCTGACAGCTCGCGGAGCCGAAGTCAGTTTTTGTGAATGTTATCAACGAAGTGCGAAACATTACGATGGCGCAGAAGAGGCGATGCGCTGGCACACTCGCGGCGTAACGACGCTTGTTGTCACCAGCGGCGAGATGTTGCAA +>17 +GCGGCGCTCACGGACAACGATCTGGTGTTCGCCCTCTCGCAACACGCCGTCGCCTTTGCCCACGCCCAACTGCAACAGCAGGAGCTGGACTGGCCTGTGCAACCACGCTACTTCGCCATCGGGCGCACAACGGCGCTGGCGCTGCATACCGTTAACGGATGCGATATTCGCTATCCTCTGGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAAATATTGCGGGAAAACGAGCGCTTATTTTACGGGGCAACGGCGGGCGTGAACTGTTAGGCAAAACCCTCACAGAACGCGGCGCTGAAGTCACCTTTTGTGAATGTTATCAACGCAGTGCAAAACATTACGATGGCGCGGAAGAGGCGATGCGCTGGCACTCTCGCGGCGTGACGACGATTGTTGTCACCAGCGGCGAAATGCTGCAA +>18 +GAAACACTTGGCGATAACGATCTGCTCTTTGCACTTTCTCAACATGCAGTGTCATTCGCCCATGCGCAGTTGCAACAGCAGGGGCTAAACTGGCCATCACTTCCGCATTATTTCGCTATTGGCCGTACTACCGCTCTCGCCCTGCACACCGTAAGCGGACATAAGATTCGCTATCCACAAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCGGAATTACAAAGTATTGCGGGAAAACGCGCACTTATTTTGCGCGGTAACGGCGGCCGTGAATTGATCGGTCAGACGCTGACATCACGTGGTGCCGACGTTACTTTTTGTGAATGTTATCAACGCAGTGCGAAGCATTACGATGGTGCGGAAGAAGCTATGCGCTGGCAGTCTCGCGGCGTAACAACCGTCGTTGTAACCAGCGGTGAAATGCTGCAA +>19 +CGTCTCTTGCAGGAAGGCGATCTGCTCTTTGCGCTGTCGCAGCATGCCGTGGAGTTTGCCCATGCGCAGCTGCAACAGCATGCCGTTAGCTGGCCTCACGCCCCCCGCTATTTCGCCATCGGGCGCACCACGGCGCTGGCCTTACATACCGCGAGCGGAATCGATGTTCGTTACCCGTTAGATCGGGAAATCAGCGAAGTCTTGCTACAATTACCTGAATTACAAACCATTGCCGGAAAGCGCGCGCTCATTTTGCGCGGCAACGGTGGCCGCGAACTGCTGGGCGAAACGCTGCGCGAACGCGGCGCAGACGTGACGTTTGTGGAGTGCTATCAGCGCTGTGCGAAACACTATGATGGCGCGGAAGAAGCAATGCGCTGGCACGCCCGCGGTATTAATACGCTGGTGGTCACCAGCGGTGAAATGTTACAA +>20 +ATTGCGGGATGCCAGAAGGTGGTTCTGTGCTCGCCGCCACCCATCGCTGATGAAATCCTCTATGCGGCGCAACTGTGTGGCGTGCAGGAAATCTTTAACGTCGGCGGCGCGCAGGCGATTGCCGCTCTGGCCTTCGGCAGCGAGTCCGTACCGAAAGTGGATAAAATTTTTGGCCCCGGCAACGCCTTTGTAACCGAAGCCAAGCGTCAGGTCAGCCAGCGTCTCGACGGCGCGGCTATCGATATGCCAGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCAACACCGGATTTCGTCGCTTCTGACCTGCTCTCCCAGGCTGAGCACGGCCCGGATTCCCAGGTGATCCTGCTGACGCCGGATGCTGACATTGCCCGCAAGGTGGCGGAGGCGGTAGAACGTCAACTGGCGGAACTGCCGCGCGCGGGCACCGCCCGGCAGGCCCTGAGCGCCAGTCGTCTGATTGTGACCAAAGATTTAGCGCAGTGCGTC +>21 +ATTGCCGGATGCAAAAAAGTGGTGTTGTGCTCGCCACCGCCTATCGCGGATGAAATCCTTTACGCTGCGCAGCTGTGCGGCGTGCAGGAAATCTTCAACGTCGGCGGCGCCCAGGCCATTGCCGCTCTGGCGTTCGGCAGCGAATCCGTGCCAAAAGTGGACAAAATTTTTGGCCCCGGCAACGCGTTTGTCACCGAGGCGAAACGCCAGGTCAGCCAGCGTCTCGACGGCGCGGCAATTGATATGCCTGCCGGCCCTTCTGAAGTGCTGGTGATCGCCGACAGCGGCGCCACGCCAGATTTCGTGGCGTCTGACCTGCTCTCTCAGGCGGAACACGGCCCGGATTCTCAGGTCATCCTGCTGACCCCGGATGCCGGTATTGCGCAGAACGTCGCAGAGGCCGTCGAACGCCAGTTAGCGGAGTTACCGCGTGCAGAAACGGCGCGTCAGGCATTAAGCGCCAGCCGTCTGATCGTGACGAAAGACTTAGCCCAGTGCGTC +>22 +ATTGCAGGCTGTAAAAAAGTGGTGTTGTGCTCTCCCCCACCTATCGCCGATGAAATTCTGTATGCTGCGCAGCTCTGCGGCGTACAGGATGTGTTTAACGTTGGGGGCGCACAAGCTATTGCCGCGCTGGCATTTGGCAGTGAATCCGTGCCGAAAGTGGACAAAATTTTTGGCCCCGGTAATGCCTTTGTGACCGAAGCCAAACGTCAGGTGAGTCAGCGTCTGGACGGCGCCGCCATCGATATGCCAGCAGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCCACGCCGGATTTCGTTGCCTCTGACTTACTCTCGCAGGCCGAACACGGCCCCGATTCCCAAGTGATCCTGCTGACGCCGGATGCCGGTATGGCCAGCCGGGTTGCTGAAGCAGTAGAACGCCAGCTTGCAGCGCTGCCACGCGCTGAAACCGCGCGGCAGGCGTTAAGCGCCAGTCGTCTGATTGTCACCCGCTCCCTTGCGCAATGCGTA +>23 +ATTGCGGGCTGTAAAAAAGTGGTGCTGTGCTCACCGCCGCCGATTGCCGATGAGATCCTTTACGCGGCGCAGCTGTGCGGTGTGCAGGACGTGTTTAACGTCGGCGGCGCACAGGCCATTGCCGCGCTGGCGTTTGGTACAGAATCCGTGCCGAAAGTGGACAAAATCTTCGGGCCAGGTAACGCCTTTGTCACCGAGGCAAAACGTCAGGTGAGCCAGCGTCTGGACGGTGCGGCGATCGATATGCCCGCAGGCCCGTCGGAAGTGCTGGTGATTGCTGACAGCGGCGCAACGCCGGATTTCGTGGCTTCTGATTTGCTCTCCCAGGCTGAACACGGCCCGGACTCTCAGGTGATTTTACTGACGCCCGCTGCTGATATGGCGCGTCGCGTAGCCGAAGCTGTCGAACGCCAGCTGGCAGAACTGCCGCGAGCTGAAACCGCCCGCCAGGCACTGAACGCCAGCCGCCTGATCGTGACTAAAGATTTAGCGCAGTGCGTG +>24 +ATTGCCGGTTGTCAGAAGGTGGTGCTCTGCTCTCCTCCACCGATCGCCGATGAGATCCTGTACGCGGCGAAGCTGTGCGGCGTGCAGGCGATCTATAAAGTGGGCGGTGCGCAGGCGATTTCTGCCCTGGCGTTCGGAACAGTATCCATTCCTAAGGTCGACAAAATCTTTGGCCCGGGCAATGCCTACGTGACCGAGGCGAAGCGCCAGGTCAGCCAGCGTCTGGACGGCGCGGCGATTGATATGCCTGCCGGTCCGTCTGAAGTGCTGGTGATTGCCGACAGCGGCGCTACACCGGATTTCGTGGCCTCTGACCTGCTCTCGCAGGCCGAGCACGGCCCTGACTCGCAGGTGATTTTACTGACGCCAGATGCCGACATGGCAAAACGCGTGGGCGACGCCGTTGAGCGTCAGCTGGCTGACCTGCCGCGGGCGGAAACGGCGCGTCAGGCGCTATCCGCCAGCCGCCTGATTGTGGCCCGCGATCTTGACCAGTGCATC +>25 +ATCGCCGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTACGCCGCGCAACTCTGTGGCGTGAAAGAAGTGTTTAACGTGGGTGGCGCACAGGCCATTGCCGCGCTGGCGCTGGGCACGGAGTCTATTCCAAAAGTCGATAAAATCTTTGGGCCGGGCAACGCCTATGTGACCGAAGCCAAGCGCCAGGTCAGCCAGCGTCTTGACGGCGCGGCAATCGATATGCCCGCCGGACCGTCCGAAGTATTGGTTATCGCCGACAGCGGCGCAACGCCGGATTTTGTCGCCTCCGACCTGCTTTCTCAGGCCGAGCACGGCCCAGACTCGCAGGTGATCCTGCTGACGCCGGACGCTAAGCTTGCCGAGGGCGTGGCCGAAGCCGTTGAACGCCAGCTCGCCGAGCTGTCCCGCGCCGACACCGCGCGTCAGGCGCTCTCCGCCAGCCGTTTAATCGTAGCGAAAGATCTGGCGCAGTGCGTG +>26 +ATCGCGGGCTGTAAAAAAGTGGTGCTGTGCTCGCCGCCGCCGATTGCCGATGAAATCCTCTATGCGGCGCGTTTGTGCGGGGTACAGCAGGTCTATCAGGTGGGCGGCGCTCAGGCCATCGCGGCGCTGGCGTTTGGCACCGAGACCGTACCCAAAGTGGACAAAATCTTCGGGCCGGGCAATGCGTTTGTCACCGAAGCCAAACGTCAGGTCAGCCAGCGGCTGGATGGCGCGGCGATTGATATGCCTGCCGGGCCGTCTGAAGTGCTGGTGATCGCCGATAGCGGCGCGACCACGGATTTCGTGGCCTCGGATTTGCTGTCCCAGGCGGAACACGGCCCGGATTCGCAGGTGATCCTGCTGACACCGGACAGCGCCATGGCGCAGGCGGTGGCCGACGCGGTTGAGCGTCAACTCGCCGAACTGCCGCGCGCGGAAACAGCTCGCCAGGCGCTGGCGGAAAGCCGCCTGATTGTGGCGCGCGATTTAGCGCAGTGCGTG +>27 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAATTTTTGACATTCTGGATATTCCGCACCATGTCGAAGTGGTTTCTGCTCACCGTACCCCCGATAAACTGTTCAGCTTTGCCGAAAATGCTGAAGAAAACGGCTTTCAGGTAATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCAGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTTGGCGTTCCGGTACAAAGCGCTGCGCTAAGCGGTGTGGACAGTCTCTATTCTATTGTACAGATGCCGCGCGGTATTCCGGTTGGCACACTGGCCATCGGCAAAGCTGGCGCCGCTAACGCGGCGCTGCTGGCGGCGCAAATTCTGGCCACCCACGATAACGCACTGCATCAGCGCCTTCGCGAC +>28 +AGCGACTGGACTACCATGCAATTCGCCGCCGAAATTTTTGAAATTCTGGATGTTCCGCACCATGTAGAAGTGGTTTCCGCCCATCGAACCCCTGATAAACTGTTCAGCTTCGCCGAAACGGCGGAAGAGAACGGATATCACGTGATTATTGCCGGCGCGGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACATTGGTGCCGGTACTCGGCGTTCCGGTACAAAGCGCAGCATTAAGCGGTGTGGATAGCCTTTACTCCATTGTTCAGATGCCGCGTGGCATTCCGGTGGGTACACTGGCTATCGGCAAAGCCGGGGCTGCGAACGCCGCGCTGCTGGCAGCGCAAATTTTGGCCACACACGATAATGCGCTGCACCAGCGCCTGAGCAAC +>29 +AGCGACTGGGCTACCATGCAGTTCGCCGCAGAAATCCTCGATATTCTGAACGTACCTCACCATGTTGAAGTGGTTTCCGCCCACCGCACGCCCGATAAACTGTTCAGCTTCGCCGAAGACGCCGAAAGCAACGGTTATCAGGTGATTATTGCCGGTGCCGGCGGCGCTGCGCACTTACCCGGAATGATTGCCGCCAAAACGCTGGTCCCGGTATTAGGTGTACCCGTCCAGAGCGCCGCATTAAGCGGTGTCGATAGCCTCTACTCCATCGTGCAGATGCCGCGCGGCATTCCGGTCGGTACGCTGGCGATCGGTAAAGCCGGTGCCGCTAACGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>30 +AGCGACTGGGCTACCATGCAGTTCGCCGTCGAAATCTTCGAAATCCTGAATGTCCCGCACCACGTTGAAGTGGTTTCTGCTCACCGCACCCCCGATAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTATCAGGTGATTATTGCGGGCGCAGGCGGCGCAGCGCACCTGCCAGGCATGATTGCCGCCAAAACGCTGGTGCCGGTGCTGGGCGTGCCAGTACAGAGCGCCGCACTGAGCGGTGTCGATAGCCTCTACTCCATCGTACAAATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATTGGTAAAGCTGGCGCGGCAAACGCGGCATTACTGGCAGCACAAATTCTCGCGACTCACGATAAAGAGCTACACCAGCGTCTGAATGGC +>31 +AGCGACTGGGCTACCATGCAGTTTGCCGCCGAAATCTTCGATATCCTGAACGTTCCACACCACGTTGAAGTGGTTTCCGCACACCGCACCCCCGATAAGCTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAAGGGTTATCAGGTGATTATTGCCGGTGCTGGCGGCGCGGCGCATCTGCCGGGAATGATTGCGGCAAAAACGCTGGTGCCGGTACTGGGCGTGCCGGTGCAAAGCGCTGCGCTGAGCGGCGTGGACAGCCTCTACTCTATCGTCCAGATGCCGCGCGGCATTCCGGTCGGCACGCTGGCGATCGGCAAAGCGGGCGCGGCGAACGCGGCGTTACTGGCAGCGCAAATTCTGGCGACACACGATAAAGACCTGCGCCAACGTCTGGCGGAC +>32 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTCGAAATGCTGGACGTTCCGCACCATGTTGAAGTCGTCTCAGCCCACCGTACCCCTGATAAACTGTTCAGCTTCGCCGAAAGCGCTGAAGAAAACGGTTATCAGGTTATTATTGCGGGTGCTGGCGGTGCAGCGCATCTGCCGGGCATGATTGCAGCGAAAACGCTGGTCCCCGTGTTAGGCGTTCCGGTACAAAGCGCAGCGTTGAGCGGCGTAGATAGCCTCTACTCAATCGTGCAGATGCCACGCGGCATCCCCGTGGGTACGCTGGCGATTGGGAAAGCGGGTGCGGCAAATGCGGCCCTGCTGGCAGCACAAATTCTGGCAACACACGACAAAGCATTACATCAGCGTCTGAGCGAC +>33 +AGTGACTGGGCAACCATGTCTCATGCCGCAGATGTATTAGATACACTACAAATTCCTTACCATGTTGAGATTGTCTCTGCACACCGAACCCCTGATAAGTTATTTAGTTTTGCTGAAAAAGCAAAAAGTAATGGCTTTGATGTCATTATTGCTGGTGCAGGAGGAGCTGCCCATTTACCAGGAATGCTTGCAGCTAAAACGTTAGTACCCGTATTTGGTGTTCCTGTTCAAAGTGCGACATTAAGCGGTGTTGATAGCCTCTATTCAATCGTACAAATGCCAAAAGGTATCCCTGTAGGAACCTTAGCGATTGGTAAAGCAGGGGCTGCCAATGCGGCTTTATTAGCGGCTCAAGTTTTAGCGTTACATTCTCCTGCTATTTTAGATGCATTGACTGCA +>34 +AGCGACTGGGCTACCATGCAGTTCGCCGCCGAAATCTTTGAAATCCTGAATGTTCCGCACCACGTCGAAGTGGTTTCCGCACACCGTACCCCGGACAAACTGTTCAGCTTCGCCGAAAGCGCCGAAGAGAACGGTTACGAGGTGATCATTGCCGGTGCGGGCGGCGCAGCACATCTGCCGGGCATGATTGCCGCCAAAACGCTGGTGCCGGTACTGGGTGTTCCCGTGCAAAGCGCCGCGTTAAGCGGGGTGGATAGCCTTTACTCTATTGTCCAGATGCCGCGCGGTATTCCTGTCGGTACCCTGGCGATTGGTAAAGCAGGTGCGGCAAATGCCGCCCTGCTGGCCGCGCAGATCCTGGCGACGCATGATAAAGATTTGCACCAGCGTCTGGCGGAG +>35 +AGCGACTGGGCTACCATGCAATTCGCCGCCGAAACGGCGGAAGAGAACGGATATCAAGTGATTATTGCCGGCGCGGGCGGCGCGGCGCACCTGCCGGGAATGATTGCGGCAAAAACGCTGGTCCCGGTACTCGGCGTGCCGGTACAAAGCGCTGCGCTAAGCGGCGTGGATAGCCTTTACTCCATTGTGCAGATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATCGGTAAAGCCGGTGCGGCTAATGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCCGAC +>36 +AGCGACTGGGCCACCATGCAGCATGCCGCTGAAATTCTTGATGCCCTTGATGTTCCTTACCATGTTGAAGTGGTTTCCGCTCACCGCACGCCTGATAAGCTTTTCAGCTTTGCTGAATCCGCGCAGCACAACGGTTATCAGGTGATTATTGCTGGCGCAGGCGGTGCGGCGCATCTGCCGGGCATGATCGCCGCGAAAACCCTGGTGCCGGTATTAGGCGTGCCGGTGCAAAGCGCGGCCCTGAGCGGCGTGGACAGCCTCTACTCTATCGTGCAAATGCCGCGCGGCATTCCGGTAGGGACGCTGGCGATCGGCAAAGCGGGTGCTGCAAACGCCGCACTGCTGGCGGCGCAGATCCTCGCCCAGCATGACGATGCGCTACTGGCGCGTCTGGCGGCA +>37 +AAACGCTTCCTGAACGAACTGACCGCCGCTGAAGGGCTGGAACGTTATCTGGGCGCCAAATTCCCGGGTGCGAAACGTTTCTCGCTCGAGGGGGGAGATGCGCTGATACCTATGCTGAAAGAGATGGTTCGCCATGCGGGTAACAGCGGCACTCGCGAAGTGGTGCTGGGGATGGCGCACCGCGGTCGTCTGAACGTGCTGATCAACGTACTGGGTAAAAAACCGCAGGATCTGTTCGACGAGTTTGCCGGTAAACATAAAGAACATCTGGGTACCGGCGACGTGAAGTATCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGTCTGGTTCACCTGGCGCTGGCGTTTAACCCATCGCATCTGGAAATTGTGAGCCCGGTGGTGATGGGCTCCGTGCGCGCCCGTCTGGACCGACTGGACGAACCGAGCAGTAATAAAGTGCTGCCGATCACTATTCACGGCGACGCCGCGGTGACCGGCCAGGGCGTGGTTCAG +>38 +AAACGCTTCCTGAACGAACTGACCGCTGCAGAAGGGCTGGAACGTTATCTGGGGGCAAAATTCCCTGGCGCGAAACGTTTTTCGCTGGAAGGCGGCGATGCGTTAATTCCGATGCTCAAAGAGATGGTCCGCCATGCGGGCAACAGCGGCACCCGCGAAGTGGTGTTGGGAATGGCGCACCGTGGTCGCCTGAACGTACTGGTCAACGTGCTGGGTAAAAAACCTCAGGATCTGTTTGACGAGTTTGCCGGTAAACATAAAGAACATTTGGGCACCGGCGACGTGAAGTACCATATGGGTTTCTCGTCGGATATCGAAACCGAAGGCGGACTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTCAGCCCGGTAGTGATGGGGTCTGTGCGCGCACGTCTCGACCGGCTCGACGAACCGAGCAGCAACAAAGTGTTGCCAATCACCATTCATGGTGATGCAGCAGTTACCGGGCAGGGCGTGGTTCAG +>39 +AAACGCTTCTTAAGCGAACTGACCGCCGCTGAAGGCCTTGAACGTTACCTCGGCGCAAAATTCCCTGGCGCAAAACGCTTCTCGCTGGAAGGCGGTGACGCGTTAATCCCGATGCTTAAAGAGATGATCCGCCACGCTGGCAACAGCGGCACCCGCGAAGTGGTTCTCGGGATGGCGCACCGTGGTCGTCTGAACGTGCTGGTGAACGTGCTGGGTAAAAAACCGCAAGACTTGTTCGACGAGTTCGCCGGTAAACATAAAGAACACCTCGGCACGGGTGACGTGAAATACCACATGGGCTTCTCGTCTGACTTCCAGACCGATGGCGGCCTGGTGCACCTGGCGCTGGCGTTTAACCCGTCTCACCTTGAGATTGTAAGCCCGGTAGTTATCGGTTCTGTTCGTGCCCGTCTGGACAGACTTGATGAGCCGAGCAGCAACAAAGTGCTGCCAATCACCATCCACGGTGACGCCGCAGTGACCGGGCAGGGTGTGGTTCAG +>40 +AAACGCTTCCTCAGCGAACTGACTGCAGCGGAAGGTCTGGAACGCTACCTGGGCGCGAAATTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGTGATGCGTTAATCCCAATGCTCAAAGAGATGATCCGCCACGCCGGTAACAGCGGTACCCGTGAAGTGGTACTGGGTATGGCGCACCGTGGTCGTCTGAACGTCCTGGTTAACGTGCTGGGTAAAAAGCCGCAGGATCTATTCGACGAATTTGCGGGCAAACATAAAGAACACCTCGGTACCGGTGACGTGAAGTACCACATGGGCTTCTCATCGGATATCGAAACCGAAGGCGGTCTGGTGCATCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTTATCGGTTCCGTACGTGCACGCTTGGATCGTCTGGACGAGCCGAGCAGCAATAAAGTGCTGCCAATCACTATTCATGGTGATGCGGCAGTAACCGGGCAAGGCGTGGTTCAG +>41 +CGTACTTTCCTTGAAGAGCTGACTGCCGCTGAAGGTTTAGAGCGCTATCTTGGTGCGAAATTCCCTGGTGCTAAACGTTTCTCTCTCGAAGGGGGGGATGCCTTAGTTCCGATGACCAAAGAGATGATCCGTCACGCGGGTGCCAGTGGCATGCGTGAAGTGGTGATTGGGATGGCGCACCGCGGTCGCTTGAACATGCTGGTCAACGTTCTGGGTAAAAAACCGCAAGATCTGTTTGATGAGTTTGCCGGTAAACATGGCGAAGGCTGGGGCACAGGTGATGTGAAATATCACCAAGGTTTCTCCGCTGACTTTGCGACACCGGGCGGTGATGTTCACTTAGCACTGGCTTTCAACCCATCGCATCTTGAGATTGTGAACCCTGTTGTGATGGGTTCAGTTCGCGCGCGTCAAGACCGCCTAGGTGATGAAGATGGCAGTAAAGTGCTACCTATCACTATCCATGGTGACTCTGCGATTGCCGGACAAGGTGTGGTGGCT +>42 +AAACGCTTCCTGAGCGAGCTGACCGCAGCCGAAGGCCTTGAGCGCTACCTGGGCGCGAAGTTCCCGGGCGCGAAACGCTTCTCGCTGGAAGGCGGCGACGCGCTGATCCCGATGCTGAAAGAGATGATTCGCCACGCGGGCAACAGCGGCACGCGTGAAGTGGTGCTGGGTATGGCGCACCGCGGTCGTCTTAACGTGCTGGTTAACGTGCTGGGTAAAAAACCGCAGGACCTGTTCGACGAGTTCGCGGGCAAACACAAAGAACACCTTGGCACCGGCGACGTGAAGTACCACATGGGCTTCTCGTCAGATATCGAAACTGAAGGCGGCCTGGTTCACCTGGCGCTGGCGTTTAACCCGTCGCACCTGGAAATCGTTAGCCCGGTGGTAATTGGTTCGGTACGTGCCCGTCTGGATCGGCTGGACGAGCCGAGCAGCAACAAAGTACTGCCGATCACCATTCACGGCGACGCCGCGGTGACCGGTCAGGGCGTGGTTCAG +>43 +GTGCTGGGCCGTAATGGTTCCGACTATTCCGCCGCCGTGCTGGCCGCCTGTTTACGCGCTGACTGCTGTGAAATCTGGACTGACGTCGATGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGACTGCTGAAATCGATGTCCTACCAGGAAGCGATGGAACTCTCTTACTTCGGCGCCAAAGTCCTTCACCCTCGCACCATAACGCCTATCGCCCAGTTCCAGATCCCCTGTCTGATTAAAAATACCGGTAATCCGCAGGCGCCAGGAACGCTGATCGGCGCGTCCAGCGACGATGATAATCTGCCGGTTAAAGGGATCTCTAACCTTAACAACATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGGATGATTGGGATGGCGGCGCGTGTTTTCGCCGCCATGTCTCGCGCCGGGATCTCGGTGGTGCTCATTACCCAGTCCTCCTCTGAGTACAGCATCAGCTTCTGTGTGCCGCAGAGTGACTGC +>44 +GTGCTGGGGCGTAACGGTTCCGACTATTCCGCTGCGGTACTGGCCGCCTGTTTACGCGCCGACTGTTGCGAAATCTGGACGGACGTTGACGGTGTGTATACCTGCGACCCGCGCCAGGTGCCGGATGCCAGACTGCTGAAGTCAATGTCCTATCAGGAAGCGATGGAACTTTCCTACTTCGGCGCCAAAGTGCTTCACCCGCGTACCATTACTCCCATCGCTCAATTCCAGATCCCATGTCTGATAAAAAATACCGGTAATCCGCAAGCGCCGGGCACGCTGATTGGCGCCAACAGCGATGAAGACGGGCTACCGGTAAAAGGCATCTCGAACCTCAATAATATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGCATGGTCGGGATGGCGGCGCGCGTGTTCGCCACCATGTCGCGTGCCGGGATTTCGGTAGTGCTGATCACCCAATCCTCTTCGGAGTACAGCATCAGCTTCTGCGTGCCGCCAAAGCGATGC +>45 +GTGCTGGGCCGTAACGGCTCCGATTATTCCGCCGCCGTACTGGCCGCCTGTTTACGCGCTGACTGTTGTGAAATCTGGACTGACGTCGACGGCGTGTATACCTGCGACCCGCGTCAGGTGCCAGACGCCAGGCTGCTGAAGTCGATGTCTTATCAGGAAGCAATGGAGCTTTCTTACTTCGGCGCTAAAGTACTACATCCGCGCACTATTACTCCTATTGCCCAGTTCCAGATCCCTTGTCTGATTAAAAATACCGGCAATCCACAAGCGCCCGGTACGCTGATCGGCGCTGCCAGCGACGATGATGCTCTGCCGGTTAAAGGGATTTCTCACCTTAACAACATGGCGATGTTTAGTGTCTCCGGTCCGGGGATGAAAGGCATGGTGGGTATGGCGGCGCGCGTTTTTGCCGCTATGTCACGTGCGGGAATCTCGGTGGTGTTGATCACGCAATCTTCATCTGAATACAGCATCAGCTTCTGCGTGCCGCAGAGCGACTGC +>46 +GTGCTGGGCCGCAACGGTTCTGATTACTCCGCTGCGGTGTTGGCTGCCTGCTTACGCGCCGACTGTTGTGAGATCTGGACTGACGTTGACGGCGTGTATACCTGTGACCCGCGCCAGGTGCCGGACGCCAGGTTGCTGAAGTCGATGTCCTATCAGGAGGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTCCTTCATCCTCGCACCATCACCCCCATTGCCCAGTTCCAAATCCCATGCCTGATTAAAAACACCGGAAACCCGCAGGCCCCTGGTACGCTGATCGGCGCCAGCGTGGATGAAGACGAACTGCCGGTGAAAGGGATCTCGAACCTGAACAATATGGCGATGTTCAGCGTTTCCGGCCCAGGAATGAAAGGGATGATCGGGATGGCGGCGCGCGTCTTCGCGGCAATGTCCCGCGCGGGGATCTCCGTGGTGCTGATCACGCAATCCTCTTCTGAATACAGCATCAGTTTCTGCGTACCGCAGGGCGACTGC +>47 +GTGTTGGGGCGCAATGGCTCTGACTACTCTGCCGCTGTGCTGGCTGCCTGTTTACGCGCGGACTGTTGTGAGATCTGGACCGATGTCGACGGCGTATATACCTGCGATCCGCGCCAGGTACCCGATGCCCGACTGCTGAAGTCGATGTCTTATCAGGAAGCGATGGAGCTTTCTTACTTCGGCGCCAAAGTTCTGCATCCGCGCACCATTACCCCAATTGCCCAGTTCCAGATCCCGTGCCTGATTAAAAATACCGGCAATCCACAAGCGCCTGGCACGTTGATCGGCGCCAGCAGTGATGAAGACGATTTGCCGGTAAAAGGTATTTCTAACCTCAATAACATGGCGATGTTTAGCGTCTCCGGCCCTGGAATGAAAGGCATGGTAGGCATGGCGGCGCGCGTTTTTGCCGCGATGTCGCGTGCGGGCATCTCGGTGGTGCTGATCACGCAGTCTTCTTCTGAATACAGCATCAGCTTCTGCGTTCCGCAGGGCGACTGC +>48 +GTATTAGGTCGCAATGGTTCAGACTACTCAGCTGCAGTATTAGCAGCCTGTTTACGTGCTAAATGCTGTGAAATTTGGACTGATGTTGACGGTGTTTATACTTGTGATCCACGTTTAGTGCCTGATGCACGTTTGTTAAAAGGCATGTCATATCAAGAGGCAATGGAACTGTCTTACTTTGGTGCCAAGGTACTTCATCCTCGTACAATTGCGCCTATTGCCCAATTCCAAATACCTTGTTTAATTAAAAATACGGGCAATCCAGATGCGCCGGGTACCTTGATTGGTGATGGTCAAAAAGATGAGAGCACACCTGTTAAAGGAATAACTAACCTTAATAATATGGCAATGATCAACGTATCTGGGCCTGGAATGAAAGGAATGGTAGGAATGGCGGCTCGCGTGTTCTCGGTAATGTCGAGAGCGGGGATTTCAGTTGTTCTAATCACACAGTCTTCTTCTGAATACAGCATTAGTTTTTGTGTGCCACAAAAAGAGCTG +>49 +GTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGCTGCCTGTTTACGCGCCGATTGTTGCGAGATTTGGACAGACGTTGACGGGGTCTATACCTGCGACCCGCGTCAGGTGCCCGATGCGAGGTTGTTGAAGTCGATGTCCTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCCCGCACCATTACCCCCATCGCCCAGTTCCAGATCCCTTGCCTGATTAAAAATACCGGAAATCCTCAAGCACCAGGTACGCTCATTGGTGCCAGCCGTGATGAAGACGAATTACCGGTCAAGGGCATTTCCAATCTGAATAACATGGCAATGTTCAGCGTTTCCGGCCCGGGGATGAAAGGAATGGTTGGCATGGCGGCGCGCGTCTTTGCAGCGATGTCACGCGCCCGTATTTCCGTGGTGCTGATTACGCAATCATCTTCCGAATACAGTATCAGTTTCTGCGTTCCACAAAGCGACTGT +>50 +GTGCTCGGGCGCAACGGCTCCGATTATTCCGCAGCGGTACTGGCAGCGTGTTTACGCGCCGATTGTTGCGAGATCTGGACTGATGTCGATGGTGTCTATACCTGCGACCCACGTCAGGTACCGGATGCCCGATTACTTAAGTCGATGTCGTACCAGGAGGCTATGGAACTCTCCTATTTCGGCGCCAAAGTCCTCCATCCTCGAACCATCACTCCCATCGCCCAGTTCCAGATTCCCTGCCTGATAAAAAATACCGGAAACCCGCAAGCACCAGGAACGCTGATTGGCGCCAGCCGCGACGAAGATGATCTGCCGGTGAAGGGCATTTCAAATCTCAATAATATGGCGATGTTCAGCGTCTCCGGGCCGGGGATGAAGGGAATGGTCGGCATGGCTGCTCGCGTGTTTGCGGCAATGTCTCGCTCAGGAATTTCGGTAGTCCTGATTACGCAATCCTCCTCTGAGTACAGCATTAGCTTCTGTGTACCGCAGGCTGACTGT +>51 +GTGCTGGGGCGTAACGGCTCTGACTACTCCGCCGCCGTGCTGGCGGCCTGCTTACGCGCGGACTGCTGTGAGATCTGGACTGACGTCGACGGCGTTTATACCTGCGATCCGCGCCAGGTACCGGACGCCAGGCTGCTGAAGTCGATGTCGTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCGCGTACCATCTCCCCGATTGCCCAGTTCCAAATCCCTTGCCTGATTAAGAATACCGGTAACCCTCAGGCGCCGGGCACGCTGATTGGCGCCAGCGCGGATGAAGATGAACTGCCGGTGAAAGGCATTTCTAACCTCAATAACATGGCGATGTTCAGCGTCTCCGGCCCGGGGATGAAGGGCATGGTCGGCATGGCGGCACGCGTATTTGCCGCTATGTCCCGCAACGGGATCTCCGTGGTGCTGATCACGCAGTCTTCTTCCGAATACAGCATCAGCTTCTGCGTTCCGCAGGGTGATTGC +>52 +GTATTAGGCCGTAACGGTTCCGACTACTCCGCCGCCGTGCTGGCCGCGTGTTTGCGCGCCGACTGTTGTGAGATCTGGACTGACGTCGACGGCGTCTATACCTGCGACCCGCGCCAGGTGCCGGACGCCAGGCTGCTGAAGTCGATGTCGTATCAGGAAGCCATGGAACTCTCCTACTTCGGCGCTAAAGTTCTCCACCCCCGCACCATTGCCCCCATCGCCCAGTTCCAAATCCCCTGTCTGATCAAAAACACTGGTAACCCGCAAGCGCCAGGCACCCTGATCGGTGCCAGCAGCGATGAAGACGGCCTGCCGGTGAAGGGCATCAGTAACCTGAATAATATGGCGATGTTCAGCGTCTCTGGTCCGGGCATGAAAGGCATGGTGGGAATGGCGGCGCGCGTGTTCGCGGCGATGTCCCGTGCGGGCATCTCGGTGGTGCTGATCACCCAATCGTCTTCTGAATACAGCATCAGCTTCTGCGTGCCGCAGGCCGACAGC diff --git a/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.ndb b/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.ndb new file mode 100644 index 0000000000000000000000000000000000000000..dfa7d2e267e27fdcac41d817c56044823912df6b GIT binary patch literal 20480 zcmeI%u}*_f6adgGnn+wQ#(ywwF7Ez{QB0atSk$?{)y4En{D3a@50nD0P>05(Iy5;9 zy!&{M%k5#hB_bVp8+{VEZl~uF`CS|BJbm@Y&P}7mw14ZFqjezx0t5&U zAV7cs0RjXF5FoI>z|eR9NB=)$AEK`2tS>!{C;j~AzWslp33bORK$56a26G33lxW~VPj%q;tg;sy%comRr(uEevFOhThS)()6H3s z6>seN|G!V$0yw3B1Wqd;f-?%p;H&~dIH!OV&MP2> z3kt~Lq5^`rq<|zYBhZn=6$K=5RRKv{Q$P~e6_CUY1tf7(0ZD8qAckiNjy+M5)T!S#HIq0c%*5F^v_TA><~h4ZV56+-Sg| z89H=uaB#4q(3T21Sm@y3WIH%GIAm~8=+L2pf`tkl90b38f%I?ag%8jBbI!SsB(IuZ z(k^B4*<8)c)zXEsoinX!-p;17R;plEoK&gmn3kQcIA*b$v2!Jdy+^vyNtV*w7Hlp@ zi&=}A_Uq*b#z3skh{xiChB0_8K5X0>-48s}*gLZSxpogb3EDc=Bo~bEs1W;3YPs zKqpdIz@~Jn3mGh9OFG?+`*?$&(wQrm)i!IBi>ABD#8L2A2- q_fmKOtI}B$@1%3Rcq^Un!D}gU8H>E+CA^Y4E@GZ~j$ut5^Zx;CzkPH7 literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.njs b/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.njs new file mode 100644 index 0000000..759cb8c --- /dev/null +++ b/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.njs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "nucleotide", + "dbtype": "Nucleotide", + "db-version": 5, + "description": "locidex/example/build_db_mlst_out/blast/nucleotide/nucleotide.fasta", + "number-of-letters": 25041, + "number-of-sequences": 53, + "last-updated": "2024-04-30T16:29:00", + "number-of-volumes": 1, + "bytes-total": 48248, + "bytes-to-cache": 7089, + "files": [ + "nucleotide.ndb", + "nucleotide.nhr", + "nucleotide.nin", + "nucleotide.not", + "nucleotide.nsq", + "nucleotide.ntf", + "nucleotide.nto" + ] +} diff --git a/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.not b/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.not new file mode 100644 index 0000000000000000000000000000000000000000..7c2dda10a1a547853d1c27aa5c54938a295d717a GIT binary patch literal 644 zcmdtTMFN5V001x>TNJwk#BTooH4p9;?y@ZRlXt;Imt1zmfvc{$?uMIgx$TZacinU3 wz6Tz9Z*opT}eelsIpMCMwH{bp6(=WgMapr&Q3tcn?%>V!Z literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.nsq b/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.nsq new file mode 100644 index 0000000000000000000000000000000000000000..7fc5e3b8e6fafd8419a1bdda10966df4ce2396be GIT binary patch literal 6297 zcmZ9Mk3W-p|Hn6KrJ`R5o!jXgnzNjv&Ns@A(o}>_DwShXLu_kiQ(xyW9GXe`!NxHA zk)0e|rLs<^qAApUQm1oQ;>2k>gtg6b7dlbPee``_*Vg%-$Ndjn&pn^7_h&fU?yf3w z)y>@g_lZg&z0+@j-e8eXQ193s( zCt37>1RIlK%0=ZTQ{iTk#0hMf?Y1WEO+e(QDM92EsP-xy!p*!m9%CkAXH}HyS zDnQM^k(N%2$!BfaKI=wncT_Do{_gI*p>sGHGN5x(vKc%)LRK0R+F#`;{hW>R`#{;VsepQ=fwXjqR%F{|vn;5!`(c&qz}?-0p>s?bvI9KDzLYIY zjBWrrxM%LeO(^@}j`)3MXmc}(7TY2#76IH1`;o{kwn#>E-ZN}=i{q!fggrAM#JEh<0j{B0I+gw}{cx7nn>c1CW_g;XD z7TIpXEyII*Eji)l?l{;zL-q$#k;KYoFkyEkBB>J><5s3c3#Lj3*D5lgyn11GCX}z2 zG<;cg_rwF8hVG7_ba@}p#h5$LiF!sIxC4EN zQHaskxbkKxPXZl26=bl`gWYL2zVN7*D{meAS{kvGRCa(zV+vlsxu;jqY0H>L%qg3P zNJZ5uO|vOknyr0)DLVl^kLcd6dLpT>qqVSrZth^ZcYgrBIRF1%O?UIE04OnSn1nI zhcJ2wxc`klU3ePkJd|F6fIGdXGe&=BD+hH~Tg$TxE#+<1B}zl)D{08|WwQ8`NG1i{ zy{V5rizzVk!iKz$!tSX1J1CzKhQRJ=7WXl5KkQHJ!*;(9^yU*|*zVJXSC(Xy03CL( z0s6ML51#1lfd04L*@yhF-EE8D>iAzvLtN`*+t1TNuD^Z*?kd_WmO$$RyXThz9d+;O z4KY&<1zovVdAPd2=pmbXBT%}f{3D=$oMCZqL|U68qTeQT=Vmwu-KPv%iw!@9}5YY!4NxOSIe$xI?yRovV3!jW_SQsllP#(6je>&y{Z-=uku zb{ezgpnk(f)fP{WWQX$jw%hI6tyLw#C8iT!#@JOY5vMyzicZ`A4MyRy3dfo@APs~2 z-&Q*fAWY5RrGqFeL2keGJ*-QncHyL4i{uDKBsf_LYSwCw?eJtY#OgzK{v+(jaGZOE zk|A%`it9^4I(zfGD*^Wivt`r9MDiua z66B`&TI9yzoXKFKOk}oEutE;G^j(FH(=0UiAkXhJ-77THU2ki*)LA>;sFt;}{Gj9C z)o^=R?PF(x5Trlr7m8EI5~jTWQmnP(Ru_|ECPG$~^eT8R2_u&x*#cuRPO!;T=+c*N z{;+r2)2mw<5s589o@e&EmvxMnwrksKFSJ!^=^LTr3Ng!X-g}sH-p$$QXq(#2iW`Wu z|4mP)>wqs$6Mqfsn5Yp#N5xDpvP#mcNS+sy@&j}P825=pC#1*Au8&Am1$pYP zsVYKtQQJ|o5V`5wFR4dQJ{n1Jy7nFh7cxDNX2WD{H7kPD(zuLgaju3qru?HTDKCzn z=wxXGoBYAd6U@pL?Q>Y_OXz6xaDhqWQ1A0umD24ySR&g!_u0i+O}ud1gM=QooFtZ} z*G@?GcDD(dR?M9$XA@%o?QFN6>m3%>7>}^;73!DPar-sHMbZWA;^68D?dyEwH{tw} z+I{2UIy#?Gty-v0^hS17O$2~%UfF+F_W z{Mvo5!+)jocU4O-8bTK*9*c=%EGHgsQ%dj%m$Zf$8N+{k*yh8fC_Ow2qjzmjWr%G1 z?q^8S70KZNt%SFAC3fvnR?U>8zgsWB=p2XG7tVGRuJ`e<#wvumrBLr#kGhM&y^K*k zVNNf&ofaNFzczY&J?1XGSiAJBQMh>t!|SPB@2bKx*`#Zjdob%O!v-ZjFbSn+RXS4> z^dgVgTWoND5x%$;+)Kcng-l&og3;|KYP|`u;Ev;Z9|gLaT~w$~tmU2rx(dnz-Sk?F zmIwFraNU3S43=so!c{I6X2mhQYI5~56`sK$xf3F({Ko}Y`P@L-WT>|~z4Ev12@H`3 z{T5q@mQP5vdB%a=`@TXZazA}I{?CSa_EY6`4!e2IBq3J53So<({9o=TV0Tk`z7gC@ zYWZmx-9x&t2Cu9;95cxEI+d?iDA4jJx`94f%UnIMxin}M&EhUa-EH^Wq86t&@oZ+d z5}uhX?wKXu|2~duSYba^TSwRp<=12M0fcQ|sAp7h<1P;0L$8ieJ*E97-}oA(hXNh0 zuIhxB{+O~>7}L-2VjO)=mCk2!NtNA^F{6)(Hmi}%%G_1f@>cqNwsKN&F3dYM{29BF zv_%2sOZGaqQJUsK`8o%;tnL7Q+t>* z$eqDJZyxW(AgL5K;EpiSo6jY>pK^SIVbi$jg6zfUe8GdXKHrun8VHFhTHex}@r z$Zh+|b|w%um4;Pl3yAIoR)%e#2pB*6v{HA(?a~2bBATF>nOp74d(V(!j6e3&AJksAqmfo%@vF(+8onYfNKddqD6vk+k zM%z!Uq!Y&HqYUbNVA!D^zz|wIROlX-R%c+}X{DdL#Q z+t8{{wlTqGkvymodG7%XTh(FM@{qpk?~k5Dv+MinE46{!y% z&m&X3{jxq;bO%~3Q*ASR*G{aQ{U7Dc*Ywo?h_)7#TK9;ypMpKcOG4mwsq>R}RbiWB zd$_25%^Zayu>|!%H-}d0aUMm^995{{u700{R-Cn294B-SZv{nPZmgAi&}E4_n{_2& zjmU9~0q?_drasm)U1Uvv8O~I1c?vx2Of4q_S#+*ok2IbSHmQ`OJGHA%1`b9w>FT-- z4DL{Lri-)Z57#yNq7PoN4^!37Wpz!jaxHXKC`PZtjb}3PtrMBE*rJrV(MHOVVQNHD z5V3?R9Bw&jD&aNI3AgJBMxb{VjSN`mTb~63y=~>$*WBt=_&B7%LXXleI=QMd>POv~ zK0_k+5iJeq#gwahd&gJ9qOj%8H5U4}MAV&>Ll{ew13eM{U0UWW&fygMXyf%GI%-I! zh29J9Jk(u{(TfYfU5mO0_k^ey3))ckf$@(LkP0>C9@V>OFz}yIKmNh#o6vKILUa73 zK-U2MA?GmF$GPS#=DvqGqd>CHJCkrAQ!W@!Vp`~;6#H%94s_ZL;)NLDu=_a+{kQWd zeW|3VJwDG|htb>A;12XHn0upo3tAp?e;L*E2dDeiQ`XSqOc$U-dHWB*o&BkE4GHMU z2V;d^1ya8EOJZt{Mlhaf#J8rW%{n8BT^>CE^q7z&y-%r9I4s6?&zWYdC%lI8#YH1J zwEP!wr_O9t2MF#tdcUR)OvO7RS4KA7asTcC>^>OS|5c0bZ07`%JCpjqSNcVg>zcNE zA3Pi`$Ph~pc{iDE!0tq^d`UDZhY0S%_wMY&tfFYRI-NEI-5uyEaA!6=As8|D>5_c3 zd>ww?lHj-4?tfif#ou-Z_n(G)ZbA7T%$;&UU*u@Hx=ZNpnrAT%&GGG2hQ`Dgxy33_ z^{=iPX!wG6mp(u(Q`1~SGx}Er?2Z&BoJ{{V8`<_*wv*QJM#ZX~&7oac{83xyAG*UC zpSbGS%E)-piay%tveo-zr!7PRUUIw3Py5dj1y&>StA9#&G%{&LJ;Rl2c3%FCf5Ede z`-NWO;~u1WrX-+r!v&PiC}@dWT~*cM_Tnx*9;K6kekovgC(sY>2D(qSlhSdlYt`3l zL%Ra_*HQXJ-RbvaA4)gq z%j`rzk0DehfICKK6|`XPZr~nIy`+wG4bSLr2>7;BxZ(SQ;J*LU1Ag)k#(oP#-6v7^ z;)m-Q9B@~GI~jFf_qE0SdH3Zr{<9*QE&~0XH1!fM$-ZZ}#W!*Ful&z3x`c?jQwhzw z+f>6ZU*}9&SMD}9m@M@9Xoabozg|-kK&yLQQ^CWfnKJp7P`38 z`j@SaqfZ0(&Xn%*PoVrN)ID3V;W9I+sC!uDo9OohrT=MYlOXOvSD(opDhF<7urwz9 z$So#DPiSyUzDrM_mYs_tM_b*k^yL|Ta>ucqn7ih^yJ<1dB|4$Y6z#hAy8V0RSa%-+ z_a93!_k;#s(v}{S-iXn&4dW>N;xAf<=8tZNJ-v8m>Budkt*U=rm9E8Y4rs@aH5Yl(nmj8 zu|Ias${PO%$$hmzcla?6sZ=)i{M7JqlDKp7ci)W|{pGtC`gt~9xth1GaCNsc446AZ z+_HCF)kiIE^_JbE$j2tp@^JItqgS_odA1YPacmcM^Jdgt@Z0)eZ`WUk%cS7GhW4Ot z-L}3t3v00Qu)9MG>V8m?aETe!4&~P*`aX&N{8K1DK9A^rL(?P2u8w%YM1-3M`sylO z!xztizWvfU8aaAwv>_ll@>uH0^xc2Li~Gj!kz+>}N*~WaZ(b0!J{Vq{sZ1I#^zWnf z*R7-UqBq~k?XErJzs#Yb^i-v?Rk5Lw6+XA;U-UJJe&~yPnf;pcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.nto b/locidex/example/manifest_selection/dbs/one_db/blast/nucleotide/nucleotide.nto new file mode 100644 index 0000000000000000000000000000000000000000..ad19396e81aff427697a109c3c035ac73cb27f3f GIT binary patch literal 216 zcmXBFg${xM06;;oyHK$M6YTo`U-LHJc6+}dFSG(hN|dQkrAD0wO0 +KFRPGHADYTYHQKYGVRDYRGGGRSSARETAMRVAAGAIAKKYLQQEFGIEVRAYLSQMGDVAIDKVDWNEIENNDFFCPDVDKVAAFDELIRELKKEGDSIGAKIQVVATGVPVGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVRQKGSQHRDPLTPQG +>1 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKHGIVIQGCLTQMGDIPLEIKDWQQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVANGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>2 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGVEIRGCLTQMGDIPLEIKDWSQVELNPFFCPDPDKIEVLDELMRGLKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKEG +>3 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLKEKLGIEVRGYLSQLGPITCDLVDWSIVESNPFFCPDPSRLDALDEYMRALKKEGNSIGAKVTVVAQGVPAGFGEPVFDRLDADLAHALMSINAVKGIEIGDGFGVVTLKGTENRDEITKKG +>4 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAEKFGIEIRGCLTQMGDIPLEIKDWSQVEQNPFFCPDPDKIDALDELMRALKKEGDSIGAKVTVVASGVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVALRGSQNRDEITKDG +>5 +VFRPGHADYTYEQKYGFRDYRGGGRSSARETAMRVAAGAIAKKYLQQKFGIVIRGCLSQMGDIPLAIKDWDQVELNPFFCADADKLDALDELMRGLKKEGDSIGAKVTVVADGVPAGWGEPVFDRLDADIAHALMSINAVKGVEIGDGFDVVKLRGSQNRDEITKAG +>6 +VFRPGHADYTYEQKYGLRDYRGGGRSSARETAMRVAAGAIAKKYLAQKFGVVIRGCLTQMGDIPLEIKDWDQVEQNPFFCPDPDKIEALDELMRALKKEGDSIGAKVTVVADSVPAGLGEPVFDRLDADIAHALMSINAVKGVEIGDGFGVVQLRGSQNRDEITTAG +>7 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>8 +MEMVARVTLSQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGSELRTVATDGHRLAVCSMPLEASLPSHSVIVPRKGVIELMRMLDGGENPLRVQ +>9 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>10 +MEMVARVALIQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPVGQPLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>11 +MEMVARVALVQPHEPGATTVPARKFFDICRGLPEGAEIAVQLEGERMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGQSLPSHSVIVPRKGVIELMRMLDGGDNPLRVQ +>12 +MEMIARVTLTQPHDAGATTVPARKFFDICRGLPEGAEIAVQLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCSMPIGDSLPNHSVIVPRKGVIELMRMLDGGETPLRVQ +>13 +MEMIARVALSLPHQAGATTVPARKFFDICRGLPEGAEIAVTLEGDRMLVRSGRSRFSLSTLPAADFPNLDDWQSEVEFTLPQATMKRLIEATQFSMAHQDVRYYLNGMLFETEGEELRTVATDGHRLAVCAMPVGQPLPNHSVIVPRKGVLELMRMLDGGDSPLRIQ +>14 +SALTENDLVFALSQHAVTFADAELQQQGKSWPSLPRYFAIGRTTALALHTVSGFNIHYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELIGETLTARGADVDFCECYQRSAKYYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>15 +AALGESDLLFALSQHAVAFAQSQLHQQDRKWPRLPTYFAIGRTTALALHTVSGQKILYPQDREISEVLLQLPELQNIAGKRALILRGNGGRELIGDTLTARGAEVTFCECYQRCAIHYDGAEEAMRWQSREVTTVVVTSGEMLQ +>16 +ATLTENDLVFALSQHAVAFAHAQLQRDGRNWPASPRYFAIGRTTALALHTVSGFDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRGRELLGETLTARGAEVSFCECYQRSAKHYDGAEEAMRWHTRGVTTLVVTSGEMLQ +>17 +AALTDNDLVFALSQHAVAFAHAQLQQQELDWPVQPRYFAIGRTTALALHTVNGCDIRYPLDREISEVLLQLPELQNIAGKRALILRGNGGRELLGKTLTERGAEVTFCECYQRSAKHYDGAEEAMRWHSRGVTTIVVTSGEMLQ +>18 +ETLGDNDLLFALSQHAVSFAHAQLQQQGLNWPSLPHYFAIGRTTALALHTVSGHKIRYPQDREISEVLLQLPELQSIAGKRALILRGNGGRELIGQTLTSRGADVTFCECYQRSAKHYDGAEEAMRWQSRGVTTVVVTSGEMLQ +>19 +RLLQEGDLLFALSQHAVEFAHAQLQQHAVSWPHAPRYFAIGRTTALALHTASGIDVRYPLDREISEVLLQLPELQTIAGKRALILRGNGGRELLGETLRERGADVTFVECYQRCAKHYDGAEEAMRWHARGINTLVVTSGEMLQ +>20 +IAGCQKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADIARKVAEAVERQLAELPRAGTARQALSASRLIVTKDLAQCV +>21 +IAGCKKVVLCSPPPIADEILYAAQLCGVQEIFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGIAQNVAEAVERQLAELPRAETARQALSASRLIVTKDLAQCV +>22 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGSESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAGMASRVAEAVERQLAALPRAETARQALSASRLIVTRSLAQCV +>23 +IAGCKKVVLCSPPPIADEILYAAQLCGVQDVFNVGGAQAIAALAFGTESVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPAADMARRVAEAVERQLAELPRAETARQALNASRLIVTKDLAQCV +>24 +IAGCQKVVLCSPPPIADEILYAAKLCGVQAIYKVGGAQAISALAFGTVSIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDADMAKRVGDAVERQLADLPRAETARQALSASRLIVARDLDQCI +>25 +IAGCKKVVLCSPPPIADEILYAAQLCGVKEVFNVGGAQAIAALALGTESIPKVDKIFGPGNAYVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATPDFVASDLLSQAEHGPDSQVILLTPDAKLAEGVAEAVERQLAELSRADTARQALSASRLIVAKDLAQCV +>26 +IAGCKKVVLCSPPPIADEILYAARLCGVQQVYQVGGAQAIAALAFGTETVPKVDKIFGPGNAFVTEAKRQVSQRLDGAAIDMPAGPSEVLVIADSGATTDFVASDLLSQAEHGPDSQVILLTPDSAMAQAVADAVERQLAELPRAETARQALAESRLIVARDLAQCV +>27 +SDWATMQFAAEIFDILDIPHHVEVVSAHRTPDKLFSFAENAEENGFQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLRD +>28 +SDWTTMQFAAEIFEILDVPHHVEVVSAHRTPDKLFSFAETAEENGYHVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDNALHQRLSN +>29 +SDWATMQFAAEILDILNVPHHVEVVSAHRTPDKLFSFAEDAESNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>30 +SDWATMQFAVEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKELHQRLNG +>31 +SDWATMQFAAEIFDILNVPHHVEVVSAHRTPDKLFSFAESAEEKGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLRQRLAD +>32 +SDWATMQFAAEIFEMLDVPHHVEVVSAHRTPDKLFSFAESAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKALHQRLSD +>33 +SDWATMSHAADVLDTLQIPYHVEIVSAHRTPDKLFSFAEKAKSNGFDVIIAGAGGAAHLPGMLAAKTLVPVFGVPVQSATLSGVDSLYSIVQMPKGIPVGTLAIGKAGAANAALLAAQVLALHSPAILDALTA +>34 +SDWATMQFAAEIFEILNVPHHVEVVSAHRTPDKLFSFAESAEENGYEVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILATHDKDLHQRLAE +>35 +SDWATMQFAAETAEENGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDAELHQRIAD +>36 +SDWATMQHAAEILDALDVPYHVEVVSAHRTPDKLFSFAESAQHNGYQVIIAGAGGAAHLPGMIAAKTLVPVLGVPVQSAALSGVDSLYSIVQMPRGIPVGTLAIGKAGAANAALLAAQILAQHDDALLARLAA +>37 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLINVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>38 +KRFLNELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMVRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVMGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>39 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDFQTDGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>40 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>41 +RTFLEELTAAEGLERYLGAKFPGAKRFSLEGGDALVPMTKEMIRHAGASGMREVVIGMAHRGRLNMLVNVLGKKPQDLFDEFAGKHGEGWGTGDVKYHQGFSADFATPGGDVHLALAFNPSHLEIVNPVVMGSVRARQDRLGDEDGSKVLPITIHGDSAIAGQGVVA +>42 +KRFLSELTAAEGLERYLGAKFPGAKRFSLEGGDALIPMLKEMIRHAGNSGTREVVLGMAHRGRLNVLVNVLGKKPQDLFDEFAGKHKEHLGTGDVKYHMGFSSDIETEGGLVHLALAFNPSHLEIVSPVVIGSVRARLDRLDEPSSNKVLPITIHGDAAVTGQGVVQ +>43 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDDDNLPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>44 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGANSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFATMSRAGISVVLITQSSSEYSISFCVPPKRC +>45 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGAASDDDALPVKGISHLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQSDC +>46 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASVDEDELPVKGISNLNNMAMFSVSGPGMKGMIGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>47 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQGDC +>48 +VLGRNGSDYSAAVLAACLRAKCCEIWTDVDGVYTCDPRLVPDARLLKGMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPDAPGTLIGDGQKDESTPVKGITNLNNMAMINVSGPGMKGMVGMAARVFSVMSRAGISVVLITQSSSEYSISFCVPQKEL +>49 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDC +>50 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDDLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRSGISVVLITQSSSEYSISFCVPQADC +>51 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTISPIAQFQIPCLIKNTGNPQAPGTLIGASADEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRNGISVVLITQSSSEYSISFCVPQGDC +>52 +VLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTIAPIAQFQIPCLIKNTGNPQAPGTLIGASSDEDGLPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRAGISVVLITQSSSEYSISFCVPQADS diff --git a/locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pdb b/locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pdb new file mode 100644 index 0000000000000000000000000000000000000000..707749d104c55e8225559067c426a6ab6375f9c0 GIT binary patch literal 20480 zcmeI%K}y3w6adg~6q;Q`yo0!N3bORK$56a26G33lxW~VPj%q;tg;sy%comRr(uEevFOhThS)()6H3s z6>seN|G!V$0yw3B1Wqd;f-?%p;H&~dIH!OV&MP2> z3kt~Lq5^`rq<|zYBhZn=6$K=5RRKv{Q$P~e6_CUY1tf7(0ZD8qAckiNjy+M5)T!S#HIq0c%*E>oX*&U?;ze(&>kB;`u7AoKH^$w)jJTk^z~LWz_a^Mn`T zW;7TL2NULeI+$EYd%|XDKJ7`(CDXC^?0;>W;%10|g1-lCsc4vGb#H3U=xlE_yzSmD z!|3Yq`izmG%uZ7Gp7B#Nf5(4U0qQc&e@C(esH`olvxESaaUo@MBfAf)xR!FP=*Jpv zIF=oQ*ub4+Ey6H1@gU`uVhr1Olx&rlzz&|Id>1CMj~B`AMi2*h<)n>>;0SM0VH2it zf)B~jf*JiDXk%;5vswsrRGz$JSzqy3#d7Rj}N9tS-R zdWz_A(&MD3n4S`PO3tKGKdz**0o+UF<9L=TLb#PGNAV?99rB5Ir}|T>A=gE&i`-gr W>&UGmx1L-#x$Y;aVMQ{U@$e0o_*|C& literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pjs b/locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pjs new file mode 100644 index 0000000..f744503 --- /dev/null +++ b/locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pjs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "protein", + "dbtype": "Protein", + "db-version": 5, + "description": "locidex/example/build_db_mlst_out/blast/protein/protein.fasta", + "number-of-letters": 8347, + "number-of-sequences": 53, + "last-updated": "2024-04-30T16:29:00", + "number-of-volumes": 1, + "bytes-total": 50128, + "bytes-to-cache": 8969, + "files": [ + "protein.pdb", + "protein.phr", + "protein.pin", + "protein.pot", + "protein.psq", + "protein.ptf", + "protein.pto" + ] +} diff --git a/locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pot b/locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pot new file mode 100644 index 0000000000000000000000000000000000000000..7c2dda10a1a547853d1c27aa5c54938a295d717a GIT binary patch literal 644 zcmdtTMFN5V001x>TNJwk#BTooH4p9;?y@ZRlXt;Imt1zmfvc{$?uMIgx$TZacinU3 wz6Tz9Z*opT}eelsIpMCMwH{bp6(=WgMapr&Q3tcn?%>V!Z literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.psq b/locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.psq new file mode 100644 index 0000000000000000000000000000000000000000..d48794c953e7b23e4dfd363f7876f1dcbf238cdb GIT binary patch literal 8401 zcmdU!+j8SL3`BzxNsxqi0pZgBf9kemJC<)5&y16)t=fIbIPr0#8v@{u9D3zQZW&8# zd132r>CD_!srx~5HzKA)IWN+h51hPpT14B-&M3KT?ofR4rMM8L>I^?23eHRFUUK&A zG%@AW%xJJxV_{z{xGs)dqDV4pDrA|>Cw3g1S#O!O#qO$%+W5mRoPOn$uQ&|9F zG7o>!GyjR6_*)P4e5vk#rl-5-3}5-PwTv9u#<}GP{-o8P?z8TMXyl`w@aJ9K-KOVZ zr{|L#<-I@Yp7ZM*t(l+0RBDcFvQ<4Vte z&RL(--Cft6#D1-(e~UjK=$SS>ANq60nYJ7aa5_B6zdc6}Io%6&UpYze!q}d3j<@Cg zmXl^oCJlAp`BOgg?iBt^?L$fMc*4Uk0JTN35zwkVMx(}A4;A=*36!{xK|&ITDTk8c zQpI(j(2hhJ+LyGFh%-iygw|IuabaL$KV(p6!W0!IKP{~*^E2fnagiI?}B|q@30#=$);{9ne;?zACf=*=FnMS|NBom9Z-s{>ap_ znM_)1O@#Sc7J`i?w&qhP#!L-+2|^a|?8cmdMI@)dl?w$Mo^o%d4MQmt)|46Zd|lIL z_+yGbSY0q$gQJan=|EdBT{RY-A_YCi_@9^S|Qc zM6KL#VrTnpPG{e%C(lsr!bz*1H=JEx(DROSytD2Y8fWS|&aLhz&hnt{5B!-oe^OfV z$)9*$z@K;sq`g0vzx3w}a~r#pKi$2a)Sb9Cr7!)N{dL_1o7Y=8GqS^%x_i-1?#iF^ ziyZY$&t^-z%KPxx27WW|<7wWNX0#H+b9|TgQ{BmLd0+M1@~-ZHMWO}5_JLJIa9W7b zyqQw$=p9)CdbuxnUKbANC}Sm(Djngh^}CqKtJp50~F(v0-e#>LyECcEb%)iw5$}LT0jUXk0~%7X<=0G zZCW%;vVPS9t^7iPLrQ=aPbl=L#TzM5k-Sl0hrCdneDMWEeybKMg_90n?6p`aFdjbf z#rJ4&bf=yxq7e-hcdH>SRd~>+n~Y@2@CA;2*S>(?j~T(~Vao_^0Q-#OCmFGI+SW=) z17>u(uE;Na;cqDZE~%)F3pO-#z z4^lkzN9Pyc7s2H>;CzFhD^@o=vn177GR`|a5*Ovs(LDttLhRg;UP<_$bH=1AxFQEE zNiq9~hZFA2d5JRzRhQP24U0uookGEp1{Yq_YBsatK#j2v|M{v~vI!2BDdN4ct}#xL zxZAve(|(E*m#G)@OwaUui8C}Dz8>nCUe@y`IG@-3HO@YQVDtYF85ao7R9W4-uOvCnM&Jm%>CpeLf*3#(f& z%b)|(BpC6g$*04>4TEjLHw6`o7`7_xm|16C8ZH?a1($%XU=nK|X<)-^0ndp^QZ9oF z&b1YD5WY7;)}1R-@$FF3BBjhU2gOkZ<5b2(g~H%&gn4YnaEX_om!#2D)nlf@fxv20 zH+_6NXVJBoPdV+(Ii7OX+<$-*E3ZQnKI$3o^!()f*Ltc!Pi$a+QFs1{y2GE|VFP=@ z`HG&;IP+bOusR%b#E)}CcR6yW964^;c^w<)IOPaW+H;Q1oN7<~*)Z*Xe~uJirG8QO zGv_hy{omz%<9xC1IF&u>OK6mD(o^s1e(9e$oxQ4OdCd9DpTFPd@BMkcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pto b/locidex/example/manifest_selection/dbs/one_db/blast/protein/protein.pto new file mode 100644 index 0000000000000000000000000000000000000000..ad19396e81aff427697a109c3c035ac73cb27f3f GIT binary patch literal 216 zcmXBFg${xM06;;oyHK$M6YTo`U-LHJc6+}dFSG(hN|dQkrAD0wO dict: """ input_file Path: Manifest file to be parsed """ + print(input_file.exists()) + print(input_file) if not input_file.is_dir(): raise AssertionError("Allele database directory must be passed directly.") diff --git a/locidex/search.py b/locidex/search.py index 81f9411..d28abae 100644 --- a/locidex/search.py +++ b/locidex/search.py @@ -304,9 +304,9 @@ def run(cmd_args=None): for opt in OPTION_GROUPS: if analysis_parameters[opt] is not None: - for option in analysis_parameters: + for option in OPTION_GROUPS[opt]: if analysis_parameters[option] is None: - parser.error("Missing required parameter: {}".format(option)) + raise AttributeError("Missing required parameter: {}".format(option)) if cmd_args.db_group is not None: analysis_parameters.db = manifest.get_manifest_db(input_file=Path(cmd_args.db_group), name=cmd_args.db_name, version=cmd_args.db_version) diff --git a/tests/test_workflows.yml b/tests/test_workflows.yml index 6748d40..46a2621 100644 --- a/tests/test_workflows.yml +++ b/tests/test_workflows.yml @@ -7,5 +7,8 @@ - name: Run extract help command: locidex extract --help +- name: Run search DB selection + command: locidex search --annotate --query locidex/example/search/NC_003198.1.fasta --db_group locidex/example/manifest_selection -o here --db_name "Locidex Database" --db_version 1.0.0 + - name: Run search - command: locidex search --annotate --query locidex/example/search/NC_003198.1.fasta -d locidex/example/build_db_mlst_out -o here \ No newline at end of file + command: locidex search --annotate --query locidex/example/search/NC_003198.1.fasta -d locidex/example/build_db_mlst_out -o here From 376a0e026036dfbb1cfee4dd7e36d676c3c06864 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 30 Apr 2024 17:17:01 -0500 Subject: [PATCH 43/51] updated multi db selection for data using a manifest --- locidex/extract.py | 2 +- locidex/manifest.py | 2 -- locidex/search.py | 3 ++- tests/test_workflows.yml | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/locidex/extract.py b/locidex/extract.py index 19a5b90..1cd8a0f 100644 --- a/locidex/extract.py +++ b/locidex/extract.py @@ -270,7 +270,7 @@ def run(cmd_args=None): raise AttributeError("Missing required parameter: {}".format(option)) if cmd_args.db_group is not None: - analysis_parameters.db = manifest.get_manifest_db(input_file=Path(cmd_args.db_group), name=cmd_args.db_name, version=cmd_args.db_version) + analysis_parameters["db"] = str(manifest.get_manifest_db(input_file=Path(cmd_args.db_group), name=cmd_args.db_name, version=cmd_args.db_version)) diff --git a/locidex/manifest.py b/locidex/manifest.py index 43f83e6..7fe010c 100644 --- a/locidex/manifest.py +++ b/locidex/manifest.py @@ -180,8 +180,6 @@ def read_manifest(input_file: pathlib.Path) -> dict: """ input_file Path: Manifest file to be parsed """ - print(input_file.exists()) - print(input_file) if not input_file.is_dir(): raise AssertionError("Allele database directory must be passed directly.") diff --git a/locidex/search.py b/locidex/search.py index d28abae..804224b 100644 --- a/locidex/search.py +++ b/locidex/search.py @@ -291,6 +291,7 @@ def run_search(config): fh.write(json.dumps(store_obj.record, indent=4)) run_data['analysis_end_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S") + print(run_data) with open(os.path.join(outdir,"run.json"),'w' ) as fh: fh.write(json.dumps(run_data, indent=4)) @@ -309,7 +310,7 @@ def run(cmd_args=None): raise AttributeError("Missing required parameter: {}".format(option)) if cmd_args.db_group is not None: - analysis_parameters.db = manifest.get_manifest_db(input_file=Path(cmd_args.db_group), name=cmd_args.db_name, version=cmd_args.db_version) + analysis_parameters["db"] = str(manifest.get_manifest_db(input_file=Path(cmd_args.db_group), name=cmd_args.db_name, version=cmd_args.db_version)) config_file = cmd_args.config diff --git a/tests/test_workflows.yml b/tests/test_workflows.yml index 46a2621..54b7977 100644 --- a/tests/test_workflows.yml +++ b/tests/test_workflows.yml @@ -8,7 +8,7 @@ command: locidex extract --help - name: Run search DB selection - command: locidex search --annotate --query locidex/example/search/NC_003198.1.fasta --db_group locidex/example/manifest_selection -o here --db_name "Locidex Database" --db_version 1.0.0 + command: locidex search --annotate --query locidex/example/search/NC_003198.1.fasta --db_group locidex/example/manifest_selection/dbs -o here --db_name "Locidex Database" --db_version 1.0.0 - name: Run search command: locidex search --annotate --query locidex/example/search/NC_003198.1.fasta -d locidex/example/build_db_mlst_out -o here From 3b416ae05514ddb74f915be53687019124130521 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 2 May 2024 13:20:33 -0500 Subject: [PATCH 44/51] added complete workflow tests and updated CI Scripts --- .../workflows/locidex-ci-pytest-workflow.yaml | 3 ++- locidex/extract.py | 4 ++-- locidex/search.py | 2 +- tests/test_workflows.yml | 22 +++++++++++++++++++ 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/.github/workflows/locidex-ci-pytest-workflow.yaml b/.github/workflows/locidex-ci-pytest-workflow.yaml index 911c917..77b0a0d 100644 --- a/.github/workflows/locidex-ci-pytest-workflow.yaml +++ b/.github/workflows/locidex-ci-pytest-workflow.yaml @@ -29,6 +29,7 @@ jobs: python -m pip install --upgrade pip pip install flake8 pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install -e setup.py - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names @@ -37,4 +38,4 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | - pytest -o log_cli=true --basetemp=tmp-pytest + pytest -o log_cli=true --basetemp=tmp-pytest --git-aware diff --git a/locidex/extract.py b/locidex/extract.py index 1cd8a0f..71fa894 100644 --- a/locidex/extract.py +++ b/locidex/extract.py @@ -25,7 +25,7 @@ def add_args(parser=None): parser.add_argument('-i','--in_fasta', type=str, required=True,help='Query assembly sequence file (fasta)') parser.add_argument('-o', '--outdir', type=str, required=True, help='Output directory to put results') parser.add_argument('-n', '--name', type=str, required=False, help='Sample name to include default=filename') - group = parser.add_mutually_exclusive_group() + group = parser.add_mutually_exclusive_group(required=True) group.add_argument('-d', '--db', type=str, required=False, help='Locidex database directory') group.add_argument("--db_group", type=str, required=False, help="A directory of databases containing a manifest file. Requires the db_name option to be set to select the correct db") parser.add_argument('--db_name', type=str, required=False, help='Name of database to perform search, used when a manifest is specified as a db') @@ -259,7 +259,7 @@ def run_extract(config): def run(cmd_args=None): if cmd_args is None: parser = add_args() - cmd_args = parser.parser_args() + cmd_args = parser.parse_args() analysis_parameters = vars(cmd_args) diff --git a/locidex/search.py b/locidex/search.py index 804224b..06853c0 100644 --- a/locidex/search.py +++ b/locidex/search.py @@ -22,7 +22,7 @@ def add_args(parser=None): description="Locidex: Advanced searching and filtering of sequence databases using query sequences",) parser.add_argument('-q','--query', type=str, required=True,help='Query sequence file') parser.add_argument('-o', '--outdir', type=str, required=True, help='Output directory to put results') - group = parser.add_mutually_exclusive_group() + group = parser.add_mutually_exclusive_group(required=True) group.add_argument('-d', '--db', type=str, required=False, help='Locidex database directory') group.add_argument("--db_group", type=str, required=False, help="A directory of databases containing a manifest file. Requires the db_name option to be set to select the correct db") parser.add_argument('-n', '--name', type=str, required=False, help='Sample name to include default=filename') diff --git a/tests/test_workflows.yml b/tests/test_workflows.yml index 54b7977..38535e9 100644 --- a/tests/test_workflows.yml +++ b/tests/test_workflows.yml @@ -1,9 +1,12 @@ - name: Run help command: locidex search --help + - name: Run search help command: locidex search --help + - name: Run build help command: locidex build --help + - name: Run extract help command: locidex extract --help @@ -12,3 +15,22 @@ - name: Run search command: locidex search --annotate --query locidex/example/search/NC_003198.1.fasta -d locidex/example/build_db_mlst_out -o here + +- name: Run extract + command: locidex extract -i locidex/example/search/NC_003198.1.fasta -d locidex/example/build_db_mlst_out -o here + +- name: Run extract DB selection + command: locidex extract -i locidex/example/search/NC_003198.1.fasta --db_group locidex/example/manifest_selection/dbs --db_name "Locidex Database" --db_version 1.0.0 -o here + +- name: Run all + command: > + bash -c " + locidex extract -i locidex/example/search/NC_003198.1.fasta -d locidex/example/build_db_mlst_out -o here; + locidex search --query here/raw.extracted.seqs.fasta -d locidex/example/build_db_mlst_out -o searched; + locidex report -i searched/seq_store.json -o reported + " + files: + - path: "here/raw.extracted.seqs.fasta" + - path: "searched/seq_store.json" + - path: "reported/report.json" + From 3f8ce6269c5869fd2d43c4eeaf87fcc27e55f880 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 2 May 2024 13:21:50 -0500 Subject: [PATCH 45/51] updated CI --- .github/workflows/locidex-ci-pytest-workflow.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/locidex-ci-pytest-workflow.yaml b/.github/workflows/locidex-ci-pytest-workflow.yaml index 77b0a0d..8329d74 100644 --- a/.github/workflows/locidex-ci-pytest-workflow.yaml +++ b/.github/workflows/locidex-ci-pytest-workflow.yaml @@ -5,9 +5,9 @@ name: Python application on: push: - branches: [ "main", "tests" ] + branches: [ "main", "tests", "dev" ] pull_request: - branches: [ "main", "tests" ] + branches: [ "main", "tests", "dev" ] permissions: contents: read From d6e80a0601aa3426e8a003477a1997f2c846bf76 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 2 May 2024 13:24:44 -0500 Subject: [PATCH 46/51] updated CI --- .github/workflows/locidex-ci-pytest-workflow.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/locidex-ci-pytest-workflow.yaml b/.github/workflows/locidex-ci-pytest-workflow.yaml index 8329d74..752054d 100644 --- a/.github/workflows/locidex-ci-pytest-workflow.yaml +++ b/.github/workflows/locidex-ci-pytest-workflow.yaml @@ -29,7 +29,7 @@ jobs: python -m pip install --upgrade pip pip install flake8 pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - pip install -e setup.py + pip install -e . - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names From e254fa61a1c17a062cb5f8cbc02ede7b1bae6079 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 2 May 2024 13:27:12 -0500 Subject: [PATCH 47/51] Added pytest-workflow to CI --- .github/workflows/locidex-ci-pytest-workflow.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/locidex-ci-pytest-workflow.yaml b/.github/workflows/locidex-ci-pytest-workflow.yaml index 752054d..68ffd9b 100644 --- a/.github/workflows/locidex-ci-pytest-workflow.yaml +++ b/.github/workflows/locidex-ci-pytest-workflow.yaml @@ -29,6 +29,7 @@ jobs: python -m pip install --upgrade pip pip install flake8 pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install pytest-workflow==2.0.1 pip install -e . - name: Lint with flake8 run: | From 35919767e6000a3bb6cade903de2c90ef62c1f3b Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 2 May 2024 13:28:49 -0500 Subject: [PATCH 48/51] Added pytest-workflow to CI --- .github/workflows/locidex-ci-pytest-workflow.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/locidex-ci-pytest-workflow.yaml b/.github/workflows/locidex-ci-pytest-workflow.yaml index 68ffd9b..136b10a 100644 --- a/.github/workflows/locidex-ci-pytest-workflow.yaml +++ b/.github/workflows/locidex-ci-pytest-workflow.yaml @@ -39,4 +39,4 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | - pytest -o log_cli=true --basetemp=tmp-pytest --git-aware + pytest -o log_cli=true --basetemp=tmp-pytest-CI --git-aware From a3b5fd7ff06f73233bf931563876e92bb0c87df2 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 2 May 2024 13:33:37 -0500 Subject: [PATCH 49/51] Added pytest-workflow to CI --- .github/workflows/locidex-ci-pytest-workflow.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/locidex-ci-pytest-workflow.yaml b/.github/workflows/locidex-ci-pytest-workflow.yaml index 136b10a..bbec25b 100644 --- a/.github/workflows/locidex-ci-pytest-workflow.yaml +++ b/.github/workflows/locidex-ci-pytest-workflow.yaml @@ -39,4 +39,4 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | - pytest -o log_cli=true --basetemp=tmp-pytest-CI --git-aware + pytest -o log_cli=true --basetemp=../tmp-pytest --git-aware From 672567a509c639b0a9f0991d255752fcb3cd6ebf Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 2 May 2024 13:42:47 -0500 Subject: [PATCH 50/51] altered CI workflow due to tmp dir issues --- .github/workflows/locidex-ci-pytest-workflow.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/locidex-ci-pytest-workflow.yaml b/.github/workflows/locidex-ci-pytest-workflow.yaml index bbec25b..a634deb 100644 --- a/.github/workflows/locidex-ci-pytest-workflow.yaml +++ b/.github/workflows/locidex-ci-pytest-workflow.yaml @@ -39,4 +39,4 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | - pytest -o log_cli=true --basetemp=../tmp-pytest --git-aware + pytest -o log_cli=true --git-aware From 44bd63c560583b70f0991441cf2d26d7e92cab8f Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 2 May 2024 13:49:19 -0500 Subject: [PATCH 51/51] updated manifest test to remove dependency on order --- tests/test_manifest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index ae2307a..46ec367 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -27,9 +27,9 @@ def test_db_list(): """ List all databases in a test directory """ - assert manifest.check_dbs(Path(TEST_PASS_MULTIPLE)) == [PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_three_db'), + assert set(manifest.check_dbs(Path(TEST_PASS_MULTIPLE))) == set([PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_three_db'), PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_two_db'), - PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_one_db')] + PosixPath('locidex/example/manifest_in/passes/pass_multiple/pass_one_db')]) @pytest.mark.parametrize("input_dir,output", [ @@ -48,7 +48,7 @@ def test_db_list(): def test_pass_validate_db_files(input_dir, output): input_path = Path(input_dir) dbs = manifest.check_dbs(input_path) - assert manifest.validate_db_files(dbs, input_path) == output + assert sorted(manifest.validate_db_files(dbs, input_path)) == sorted(output) def test_fail_validate_db_files_author(capsys): with pytest.raises(AttributeError):